In [12]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC, SVC
from sklearn.linear_model import SGDClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

In [3]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)

In [4]:
X = mnist["data"]
Y = mnist["target"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.125)

In [5]:
scaler = StandardScaler()
X_scaled_train = scaler.fit_transform(X_train)
X_scaled_test = scaler.transform(X_test)

In [6]:
linear_clf = LinearSVC()
linear_clf.fit(X_scaled_train, Y_train)
print("fitted")
predicted = linear_clf.predict(X_scaled_train)
accuracy_score(Y_train, predicted)

fitted




0.9249469387755102

In [20]:
svm_clf = SVC(decision_function_shape="ovr")
svm_clf.fit(X_scaled_train[:10000],Y_train[:10000])
print("trained");
Y_pred = svm_clf.predict(X_scaled_train)
accuracy_score(Y_train, Y_pred)

trained


0.948065306122449

In [8]:
Y_pred_test = svm_clf.predict(X_scaled_test)
accuracy_score(Y_test, Y_pred_test)

0.9398857142857143

In [27]:
params_dict = {"gamma": reciprocal(0.001, 1), "C": uniform(0.1, 100)}
random_search = RandomizedSearchCV(svm_clf, params_dict, n_iter = 128, verbose = 2, cv=3, n_jobs = -1)
random_search.fit(X_scaled_train[:1000], Y_train[:1000])

Fitting 3 folds for each of 128 candidates, totalling 384 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 12 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    2.7s
[Parallel(n_jobs=-1)]: Done 138 tasks      | elapsed:   14.9s
[Parallel(n_jobs=-1)]: Done 341 tasks      | elapsed:   37.5s
[Parallel(n_jobs=-1)]: Done 384 out of 384 | elapsed:   42.1s finished


RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                                 class_weight=None, coef0=0.0,
                                 decision_function_shape='ovr', degree=3,
                                 gamma='scale', kernel='rbf', max_iter=-1,
                                 probability=False, random_state=None,
                                 shrinking=True, tol=0.001, verbose=False),
                   iid='deprecated', n_iter=128, n_jobs=-1,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f04b033e750>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f04a503b950>},
                   pre_dispatch='2*n_jobs', random_state=None, refit=True,
                   return_train_score=False, scoring=None, verbose=2)

In [28]:
random_search.best_estimator_

SVC(C=87.61897323990813, break_ties=False, cache_size=200, class_weight=None,
    coef0=0.0, decision_function_shape='ovr', degree=3,
    gamma=0.0010210340667569833, kernel='rbf', max_iter=-1, probability=False,
    random_state=None, shrinking=True, tol=0.001, verbose=False)

In [30]:
random_search.best_score_

0.8820077562592532

In [31]:
random_search.best_estimator_.fit(X_scaled_train, Y_train)

SVC(C=87.61897323990813, break_ties=False, cache_size=200, class_weight=None,
    coef0=0.0, decision_function_shape='ovr', degree=3,
    gamma=0.0010210340667569833, kernel='rbf', max_iter=-1, probability=False,
    random_state=None, shrinking=True, tol=0.001, verbose=False)

In [32]:
Y_pred_train = random_search.best_estimator_.predict(X_scaled_train)
accuracy_score(Y_train, Y_pred_train)

1.0

In [34]:
Y_pred_test = random_search.best_estimator_.predict(X_scaled_test)
accuracy_score(Y_test, Y_pred_test)

0.9749714285714286