# Excercises

## Train SVM on MNIST dataset

In [1]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
import pandas as pd

In [2]:
X, y = fetch_openml(
    "mnist_784", version=1, return_X_y=True, as_frame=False
)

In [3]:
X.shape

(70000, 784)

In [4]:
np.random.seed(42)
rand_idx = np.random.permutation(X.shape[0])
X = X[rand_idx]
y = y[rand_idx]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=10000
)

In [6]:
# use standard scaler, as svm depends on scale of input
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [7]:
params = {'C': [0.5, 1, 2, 5]}
svc = LinearSVC(random_state=42)
svc_grid = GridSearchCV(svc, params, scoring='accuracy', cv=3)

In [8]:
svc_grid.fit(X_train, y_train)



In [None]:
svc_grid.best_params_

In [None]:
pd.DataFrame(svc_grid.cv_results_).sort_values(by="rank_test_score", ascending=True).head()