In [7]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True, as_frame=False)

X = mnist["data"]
y = mnist["target"]

X_train = X[:60000]
y_train = y[:60000]
X_test = X[60000:]
y_test = y[60000:]

In [10]:
import numpy as np
np.random.seed(42)
rnd_idx = np.random.permutation(60000)
X_train = X_train[rnd_idx]
y_train = y_train[rnd_idx]

In [11]:
lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train, y_train)

In [13]:
from sklearn.metrics import accuracy_score

y_pred = lin_clf.predict(X_train)
accuracy_score(y_train, y_pred)

0.9277333333333333

In [14]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled = scaler.transform(X_test.astype(np.float32))

In [15]:
lin_clf = LinearSVC(random_state=42)
lin_clf.fit(X_train_scaled, y_train)



In [16]:
y_pred = lin_clf.predict(X_train_scaled)
accuracy_score(y_train, y_pred)

0.9274833333333333

In [17]:
svm_clf = SVC(decision_function_shape="ovr", gamma="auto")
svm_clf.fit(X_train_scaled[:10000], y_train[:10000])

In [18]:
y_pred = svm_clf.predict(X_train_scaled)
accuracy_score(y_train, y_pred)

0.9469

In [19]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distributions, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(X_train_scaled[:1000], y_train[:1000])

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END ...C=8.852316058423087, gamma=0.0017660746504810706; total time=   0.1s
[CV] END ...C=8.852316058423087, gamma=0.0017660746504810706; total time=   0.1s
[CV] END ...C=8.852316058423087, gamma=0.0017660746504810706; total time=   0.1s
[CV] END ...C=1.8271960104746645, gamma=0.006364737055453387; total time=   0.1s
[CV] END ...C=1.8271960104746645, gamma=0.006364737055453387; total time=   0.1s
[CV] END ...C=1.8271960104746645, gamma=0.006364737055453387; total time=   0.1s
[CV] END .....C=9.875199193765326, gamma=0.05134983345187068; total time=   0.1s
[CV] END .....C=9.875199193765326, gamma=0.05134983345187068; total time=   0.1s
[CV] END .....C=9.875199193765326, gamma=0.05134983345187068; total time=   0.1s
[CV] END .....C=6.59992909281409, gamma=0.059916665784661816; total time=   0.1s
[CV] END .....C=6.59992909281409, gamma=0.059916665784661816; total time=   0.1s
[CV] END .....C=6.59992909281409, gamma=0.059916

In [20]:
rnd_search_cv.best_estimator_

In [21]:
rnd_search_cv.best_score_

np.float64(0.8419677162192132)

In [22]:
rnd_search_cv.best_estimator_.fit(X_train_scaled, y_train)

In [23]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
accuracy_score(y_train, y_pred)

0.99965

In [24]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
accuracy_score(y_test, y_pred)

0.971