In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.svm import SVC
from sklearn.model_selection import validation_curve
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt

class_names = np.arange(10)

(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Preprocess the data (these are Numpy arrays)
x_train = x_train.reshape(60000, 784).astype('float32') / 255
x_test = x_test.reshape(10000, 784).astype('float32') / 255

y_train = y_train.astype('float32')
y_test = y_test.astype('float32')

# Reserve 10,000 samples for validation
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
print(x_train.shape)

(50000, 784)


In [2]:
# creating a KFold object with 10 splits 
folds = KFold(n_splits = 10)

# specify range of hyperparameters
# Set the parameters by cross-validation
hyper_params = [ {'gamma': [1e-2, 1e-3, 1e-4],
                     'C': [5, 10, 15]}]

# specify model
model = SVC(kernel="rbf")

# set up GridSearchCV()
model_cv = GridSearchCV(estimator = model, 
                        param_grid = hyper_params, 
                        scoring= 'accuracy', 
                        cv = folds, 
                        verbose = 1,
                        n_jobs=-1,
                        return_train_score=True)      

# fit the model
model_cv.fit(x_train, y_train)

Fitting 10 folds for each of 9 candidates, totalling 90 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 206.6min
[Parallel(n_jobs=-1)]: Done  90 out of  90 | elapsed: 455.2min finished


GridSearchCV(cv=KFold(n_splits=10, random_state=None, shuffle=False),
             error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=-1,
             param_grid=[{'C': [5, 10, 15], 'gamma': [0.01, 0.001, 0.0001]}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
             scoring='accuracy', verbose=1)

In [4]:
# cv results
cv_results = pd.DataFrame(model_cv.cv_results_)
print(cv_results)

# printing the optimal accuracy score and hyperparameters
best_score = model_cv.best_score_
best_hyperparams = model_cv.best_params_

print(f'The best test score is {best_score} corresponding to hyperparameters {best_hyperparams}')

mean_fit_time  std_fit_time  mean_score_time  std_score_time param_C  \
0     967.099733    221.481038       197.927092       45.692427       5   
1     711.584545     28.583201       141.558314        4.753851       5   
2    1540.748403     61.248534       244.972260        7.632229       5   
3     533.799763     20.718123       111.443425        3.699602      10   
4     597.064827     21.881857       123.772812        4.604467      10   
5    1179.127373     40.329006       206.966229        7.223833      10   
6     532.865728     19.789864       110.977899        3.949826      15   
7     552.602026     19.356748       117.000862        4.243641      15   
8     955.475026    132.599049       160.718769       53.816893      15   

  param_gamma                      params  split0_test_score  \
0        0.01     {'C': 5, 'gamma': 0.01}             0.9832   
1       0.001    {'C': 5, 'gamma': 0.001}             0.9526   
2      0.0001   {'C': 5, 'gamma': 0.0001}             0.9322