In [1]:
from sklearn.datasets import fetch_mldata

mnist = fetch_mldata('MNIST original', transpose_data=True, data_home='datasets')
mnist['data'], mnist['target']



(array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
 array([0., 0., 0., ..., 9., 9., 9.]))

In [3]:
X = mnist["data"]
y = mnist["target"]

X_train = X[:60000]
y_train = y[:60000]
X_test = X[60000:]
y_test = y[60000:]

In [5]:
import numpy as np

np.random.seed(42)
rnd_idx = np.random.permutation(60000)
X_train = X_train[rnd_idx]
y_train = y_train[rnd_idx]

In [7]:
from sklearn.svm import LinearSVC

lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train, y_train)



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)

In [8]:
from sklearn.metrics import accuracy_score

y_pred = lin_clf.predict(X_train)
accuracy_score(y_train, y_pred)

0.85375

In [25]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train[:20000].astype(np.float32))
X_test_scaled = scaler.transform(X_test.astype(np.float32))

In [26]:
lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train_scaled, y_train[:20000])



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)

In [27]:
X_train_scaled = scaler.fit_transform(X_train[20000:40000].astype(np.float32))

lin_clf.fit(X_train_scaled, y_train[20000:40000])



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)

In [28]:
X_train_scaled = scaler.fit_transform(X_train[40000:].astype(np.float32))

lin_clf.fit(X_train_scaled, y_train[40000:])



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=42, tol=0.0001,
     verbose=0)

In [31]:
y_pred = lin_clf.predict(X_train_scaled)
accuracy_score(y_train[40000:], y_pred)

0.94135

In [33]:
from sklearn.svm import SVC

svm_clf = SVC(decision_function_shape="ovr", gamma="auto")
svm_clf.fit(X_train_scaled, y_train[40000:])

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [34]:
y_pred = svm_clf.predict(X_train_scaled)
accuracy_score(y_train[40000:], y_pred)

0.98165

In [42]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

X_train_scaled = scaler.fit_transform(X_train[:10000].astype(np.float32))

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(X_train_scaled, y_train[:10000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=1.39323209425849, gamma=0.005494878493960264 ..................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] ... C=1.39323209425849, gamma=0.005494878493960264, total= 1.1min
[CV] C=1.39323209425849, gamma=0.005494878493960264 ..................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  1.6min remaining:    0.0s


[CV] ... C=1.39323209425849, gamma=0.005494878493960264, total= 1.2min
[CV] C=1.39323209425849, gamma=0.005494878493960264 ..................
[CV] ... C=1.39323209425849, gamma=0.005494878493960264, total= 1.2min
[CV] C=7.144977357425403, gamma=0.01373214584730166 ..................
[CV] ... C=7.144977357425403, gamma=0.01373214584730166, total= 1.6min
[CV] C=7.144977357425403, gamma=0.01373214584730166 ..................
[CV] ... C=7.144977357425403, gamma=0.01373214584730166, total= 1.6min
[CV] C=7.144977357425403, gamma=0.01373214584730166 ..................
[CV] ... C=7.144977357425403, gamma=0.01373214584730166, total= 1.5min
[CV] C=1.6032271117801413, gamma=0.001330818719999577 ................
[CV] . C=1.6032271117801413, gamma=0.001330818719999577, total=  27.5s
[CV] C=1.6032271117801413, gamma=0.001330818719999577 ................
[CV] . C=1.6032271117801413, gamma=0.001330818719999577, total=  27.5s
[CV] C=1.6032271117801413, gamma=0.001330818719999577 ................
[CV] .

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed: 52.8min finished


RandomizedSearchCV(cv=3, error_score='raise-deprecating',
          estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          fit_params=None, iid='warn', n_iter=10, n_jobs=None,
          param_distributions={'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x05B80FD0>, 'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x05C68930>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=2)

In [43]:
rnd_search_cv.best_estimator_

SVC(C=5.30215378783041, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0012467168846967123,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [44]:
rnd_search_cv.best_score_

0.9398

In [45]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.9507