In [1]:
import numpy as np

from sklearn.datasets import fetch_mldata
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score

import scipy.stats as sci

In [2]:
mnist = fetch_mldata('MNIST original')
X, y = mnist["data"], mnist["target"]

In [3]:
XTrain, XTest, yTrain, yTest = X[:60000], X[60000:], y[:60000], y[60000:]

shuffleIndeces = np.random.permutation(60000)
XTrain, yTrain = XTrain[shuffleIndeces], yTrain[shuffleIndeces]

In [4]:
pipe = Pipeline([ ('normalizer', StandardScaler()) ])
prepTrain = pipe.fit_transform( XTrain.astype(np.float64) )

In [5]:
svc = SVC( kernel = "rbf" )

params = { "gamma" : sci.reciprocal(0.001, 0.1), "C" : sci.uniform(1, 10) }
randSVC = RandomizedSearchCV( svc, params, n_iter = 50, cv = 5, verbose = 1, n_jobs = 1 )

randSVC.fit( prepTrain[:1000], yTrain[:1000])

Fitting 5 folds for each of 50 candidates, totalling 250 fits


[Parallel(n_jobs=1)]: Done 250 out of 250 | elapsed:  6.3min finished


RandomizedSearchCV(cv=5, error_score='raise',
          estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
          fit_params=None, iid=True, n_iter=50, n_jobs=1,
          param_distributions={'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fe486fceef0>, 'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7fe486af5550>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score='warn', scoring=None, verbose=1)

In [6]:
randSVC.best_estimator_.fit( prepTrain, yTrain )

SVC(C=2.0644885094833678, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.0012508727703554796,
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [7]:
test = pipe.transform( XTest.astype(np.float64) )
pred = randSVC.predict( test )

accuracy_score( yTest, pred )

0.97019999999999995