In [1]:
import numpy as np

In [2]:
from sklearn.datasets import fetch_mldata

mnist = fetch_mldata("MNIST original")
mnist

{'DESCR': 'mldata.org dataset: mnist-original',
 'COL_NAMES': ['label', 'data'],
 'target': array([ 0.,  0.,  0., ...,  9.,  9.,  9.]),
 'data': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ..., 
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)}

In [3]:
X, y = mnist["data"], mnist["target"]

X_train = X[:60000]
y_train = y[:60000]
X_test = X[60000:]
y_test = y[60000:]

In [4]:
shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

In [9]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.astype(np.float32))
X_test = scaler.transform(X_test.astype(np.float32))


In [6]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier

In [19]:
svm_clf = SVC(decision_function_shape="ovr")

In [31]:
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, verbose=2)
rnd_search_cv.fit(X_train[:1000], y_train[:1000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=3.77214563707, gamma=0.00241566259991 .........................
[CV] .......... C=3.77214563707, gamma=0.00241566259991, total=   0.7s
[CV] C=3.77214563707, gamma=0.00241566259991 .........................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.0s remaining:    0.0s


[CV] .......... C=3.77214563707, gamma=0.00241566259991, total=   0.7s
[CV] C=3.77214563707, gamma=0.00241566259991 .........................
[CV] .......... C=3.77214563707, gamma=0.00241566259991, total=   0.6s
[CV] C=4.275568745, gamma=0.0292781767635 ............................
[CV] ............. C=4.275568745, gamma=0.0292781767635, total=   0.8s
[CV] C=4.275568745, gamma=0.0292781767635 ............................
[CV] ............. C=4.275568745, gamma=0.0292781767635, total=   0.8s
[CV] C=4.275568745, gamma=0.0292781767635 ............................
[CV] ............. C=4.275568745, gamma=0.0292781767635, total=   0.8s
[CV] C=9.44610153524, gamma=0.0441328306453 ..........................
[CV] ........... C=9.44610153524, gamma=0.0441328306453, total=   0.8s
[CV] C=9.44610153524, gamma=0.0441328306453 ..........................
[CV] ........... C=9.44610153524, gamma=0.0441328306453, total=   0.8s
[CV] C=9.44610153524, gamma=0.0441328306453 ..........................
[CV] .

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:   35.7s finished


RandomizedSearchCV(cv=None, error_score='raise',
          estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          fit_params=None, iid=True, n_iter=10, n_jobs=1,
          param_distributions={'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00FBB2F0>, 'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00FBB630>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=2)

In [22]:
rnd_search_cv.best_estimator_

SVC(C=3.7416427221809392, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0017640368690158145,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [35]:
rnd_search_cv.best_estimator_.fit(X_train, y_train)

SVC(C=3.2751164500303753, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0013433921095616333,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [36]:
from sklearn.metrics import accuracy_score

In [38]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train)

In [39]:
accuracy_score(y_pred, y_train)

0.99550000000000005