# MNIST with SVM

## Get Datasets

In [1]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1)
print(list(mnist))
X, y = mnist['data'], mnist['target']

['data', 'target', 'frame', 'feature_names', 'target_names', 'DESCR', 'details', 'categories', 'url']


In [5]:
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
%matplotlib inline

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

## Train with LinearSVC

In [8]:
import numpy as np
from sklearn.svm import LinearSVC

# Linear SVC will automatically choose one vs all stratagy
lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train, y_train)

from sklearn.metrics import accuracy_score

y_pred = lin_clf.predict(X_train)
accuracy_score(y_train, y_pred)



0.8735510204081632

## Scale Features

In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled = scaler.fit_transform(X_test.astype(np.float32))

In [10]:
lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train_scaled, y_train)
y_pred = lin_clf.predict(X_train_scaled)
accuracy_score(y_train, y_pred)



0.9260204081632653

## Using SVM

In [11]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

svm_clf = SVC(decision_function_shape="ovr", gamma="auto")
param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(X_train_scaled[:1000], y_train[:1000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] C=10.654661115596367, gamma=0.06984765985252415 .................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[CV] .. C=10.654661115596367, gamma=0.06984765985252415, total=   1.0s
[CV] C=10.654661115596367, gamma=0.06984765985252415 .................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.0s remaining:    0.0s


[CV] .. C=10.654661115596367, gamma=0.06984765985252415, total=   1.0s
[CV] C=10.654661115596367, gamma=0.06984765985252415 .................
[CV] .. C=10.654661115596367, gamma=0.06984765985252415, total=   1.0s
[CV] C=9.013088505273837, gamma=0.0013393214120953044 ................
[CV] . C=9.013088505273837, gamma=0.0013393214120953044, total=   0.7s
[CV] C=9.013088505273837, gamma=0.0013393214120953044 ................
[CV] . C=9.013088505273837, gamma=0.0013393214120953044, total=   0.7s
[CV] C=9.013088505273837, gamma=0.0013393214120953044 ................
[CV] . C=9.013088505273837, gamma=0.0013393214120953044, total=   0.7s
[CV] C=5.351231477520744, gamma=0.09188740614552866 ..................
[CV] ... C=5.351231477520744, gamma=0.09188740614552866, total=   0.9s
[CV] C=5.351231477520744, gamma=0.09188740614552866 ..................
[CV] ... C=5.351231477520744, gamma=0.09188740614552866, total=   1.0s
[CV] C=5.351231477520744, gamma=0.09188740614552866 ..................
[CV] .

[Parallel(n_jobs=1)]: Done  30 out of  30 | elapsed:   28.1s finished


RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                                 class_weight=None, coef0=0.0,
                                 decision_function_shape='ovr', degree=3,
                                 gamma='auto', kernel='rbf', max_iter=-1,
                                 probability=False, random_state=None,
                                 shrinking=True, tol=0.001, verbose=False),
                   iid='deprecated', n_iter=10, n_jobs=None,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x12ad8f750>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x12ad8ff90>},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False, scoring=None, verbose=2)

In [14]:
rnd_search_cv.best_estimator_

SVC(C=9.013088505273837, break_ties=False, cache_size=200, class_weight=None,
    coef0=0.0, decision_function_shape='ovr', degree=3,
    gamma=0.0013393214120953044, kernel='rbf', max_iter=-1, probability=False,
    random_state=None, shrinking=True, tol=0.001, verbose=False)

In [13]:
rnd_search_cv.best_score_

0.898997800195405

## Scores

In [15]:
rnd_search_cv.best_estimator_.fit(X_train_scaled, y_train)
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
print(accuracy_score(y_train, y_pred))

y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
print(accuracy_score(y_test, y_pred))

0.9992857142857143
0.9698571428571429
