In [1]:
from sklearn.datasets import fetch_openml
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)

X = mnist["data"]
y = mnist["target"].astype(np.uint8)

X_train = X[:60000]
y_train = y[:60000]
X_test = X[60000:]
y_test = y[60000:]

In [2]:
from sklearn.svm import LinearSVC

np.random.seed(42)
lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train, y_train)



LinearSVC(random_state=42)

In [3]:
from sklearn.metrics import accuracy_score

y_pred = lin_clf.predict(X_train)
accuracy_score(y_train, y_pred)

0.8348666666666666

In [2]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled = scaler.fit_transform(X_test.astype(np.float32))

In [5]:
lin_clf.fit(X_train_scaled, y_train)
y_pred = lin_clf.predict(X_train_scaled)
accuracy_score(y_train, y_pred)



0.9214

In [4]:
from sklearn.svm import SVC

svm_clf = SVC(kernel="rbf", gamma="scale")
svm_clf.fit(X_train_scaled[:10000], y_train[:10000])


SVC()

In [7]:
y_pred = svm_clf.predict(X_train_scaled)
accuracy_score(y_train, y_pred)

0.9455333333333333

In [8]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(X_train_scaled[:1000], y_train[:1000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END ....C=8.965429868602328, gamma=0.002327392228062871; total time=   0.9s
[CV] END ....C=8.965429868602328, gamma=0.002327392228062871; total time=   0.3s
[CV] END ....C=8.965429868602328, gamma=0.002327392228062871; total time=   0.5s
[CV] END ....C=8.796910002727692, gamma=0.015620693675639861; total time=   0.6s
[CV] END ....C=8.796910002727692, gamma=0.015620693675639861; total time=   0.4s
[CV] END ....C=8.796910002727692, gamma=0.015620693675639861; total time=   0.8s
[CV] END ...C=5.458327528535912, gamma=0.0015847101210439089; total time=   0.2s
[CV] END ...C=5.458327528535912, gamma=0.0015847101210439089; total time=   0.4s
[CV] END ...C=5.458327528535912, gamma=0.0015847101210439089; total time=   0.6s
[CV] END ....C=5.592488919658671, gamma=0.004649617447336332; total time=   0.8s
[CV] END ....C=5.592488919658671, gamma=0.004649617447336332; total time=   0.5s
[CV] END ....C=5.592488919658671, gamma=0.004649

RandomizedSearchCV(cv=3, estimator=SVC(),
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000184839BFF70>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001849A0F5E50>},
                   verbose=2)

In [9]:
rnd_search_cv.best_estimator_

SVC(C=7.116531604882809, gamma=0.0010330768043240405)

In [10]:
rnd_search_cv.best_score_

0.8639897382412353

In [None]:
rnd_search_cv.best_estimator_.fit(X_train_scaled, y_train)

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
accuracy_score(y_train, y_pred)

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2, cv=10)
rnd_search_cv.fit(X_train_scaled[:1000], y_train[:1000])

In [None]:
rnd_search_cv.best_score_

In [None]:
rnd_search_cv.best_estimator_

In [None]:
rnd_search_cv.best_estimator_.fit(X_train_scaled, y_train)

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
accuracy_score(y_train, y_pred)

In [None]:
from sklearn.model_selection import cross_val_score

cross_val_score(rnd_search_cv.best_estimator_, X_train_scaled, y_train, cv=5)

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=20, verbose=3, cv=3)
rnd_search_cv.fit(X_train_scaled[:1000], y_train[:1000])
rnd_search_cv.best_estimator_.fit(X_train, y_train)

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
accuracy_score(y_train, y_pred)

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

In [None]:
rnd_search_cv.best_estimator_