In [66]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
%matplotlib inline
import warnings

from sklearn import svm
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler

warnings.filterwarnings('ignore')
mnist = datasets.load_digits()

In [67]:
X = mnist['data']
y = mnist['target']

# Data Splitting

In [68]:
train_size = 0.8

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

shuffle_index = np.random.permutation(len(X_train))
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.astype(np.float64))
X_train

array([[ 0.        , -0.33185831,  0.18273631, ..., -1.13120955,
        -0.49133762, -0.19407567],
       [ 0.        ,  0.77975099,  2.11198422, ...,  1.42575582,
         2.23952516,  0.33312042],
       [ 0.        , -0.33185831,  1.04017983, ...,  0.91436275,
        -0.24307737, -0.19407567],
       ...,
       [ 0.        , -0.33185831, -0.03162457, ..., -0.96074519,
        -0.49133762, -0.19407567],
       [ 0.        , -0.33185831,  1.68326247, ...,  0.91436275,
         0.00518289, -0.19407567],
       [ 0.        ,  4.11457888,  1.68326247, ...,  0.06204096,
        -0.49133762, -0.19407567]])

# Grid Search - Precision Scoring

In [69]:
svc_prec = svm.SVC(kernel='linear', gamma='auto', C=2, random_state=42)

In [70]:
# Remove 'precomputed' from your parameter space.
# kernel='precomputed'can only be used when passing a (n_samples, n_samples) data matrix that represents pairwise similarities for the samples instead of the traditional (n_samples, n_features) rectangular data matrix.

param_grid = [
    {
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 
        'gamma': [1e-3, 1e-4],
        'C': [1, 10, 100, 1000, 5000]
    }
]

In [71]:
grid_search_pres = GridSearchCV(svc_prec, param_grid, cv=5, scoring='precision_weighted')
grid_search_pres

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=2, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto', kernel='linear', max_iter=-1,
                           probability=False, random_state=42, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [1, 10, 100, 1000, 5000],
                          'gamma': [0.001, 0.0001],
                          'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='precision_weighted', verbose=0)

In [72]:
grid_search_pres.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=2, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto', kernel='linear', max_iter=-1,
                           probability=False, random_state=42, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [1, 10, 100, 1000, 5000],
                          'gamma': [0.001, 0.0001],
                          'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='precision_weighted', verbose=0)

In [73]:
grid_search_pres.best_params_

{'C': 1000, 'gamma': 0.0001, 'kernel': 'sigmoid'}

In [74]:
grid_search_pres.best_estimator_

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.0001, kernel='sigmoid',
    max_iter=-1, probability=False, random_state=42, shrinking=True, tol=0.001,
    verbose=False)

In [75]:
sgd_cvres = grid_search_pres.cv_results_
sgd_cvres

{'mean_fit_time': array([0.02975388, 0.16041036, 0.09225912, 0.10500488, 0.02608852,
        0.15199718, 0.19867883, 0.17484756, 0.02826238, 0.16015534,
        0.03948336, 0.04090471, 0.02626739, 0.18085518, 0.10185876,
        0.10784593, 0.02680879, 0.17956128, 0.03462615, 0.04089475,
        0.02977161, 0.17882605, 0.04117894, 0.04160562, 0.03461661,
        0.14178333, 0.03677182, 0.03211918, 0.03757434, 0.1721437 ,
        0.03440418, 0.04031048, 0.06651211, 0.14507589, 0.05453038,
        0.03739457, 0.02788601, 0.17486606, 0.04176736, 0.05546222]),
 'std_fit_time': array([0.00594097, 0.00302532, 0.00357459, 0.0036219 , 0.00275474,
        0.00350767, 0.00733643, 0.00641419, 0.00099962, 0.00489491,
        0.00105691, 0.00165826, 0.00093217, 0.01607248, 0.0164482 ,
        0.0038424 , 0.00055788, 0.02749189, 0.00303432, 0.00642777,
        0.00571827, 0.01005177, 0.00284252, 0.0023094 , 0.00807062,
        0.03908069, 0.00245091, 0.00108157, 0.00818336, 0.00697744,
        0.002

# Grid Search - Recall Scoring

In [76]:
svc_recall = svm.SVC(kernel='linear', gamma='auto', C=2, random_state=42)

In [77]:
grid_search_recall = GridSearchCV(svc_recall, param_grid, cv=5, scoring='recall_macro')

In [78]:
grid_search_recall.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=SVC(C=2, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto', kernel='linear', max_iter=-1,
                           probability=False, random_state=42, shrinking=True,
                           tol=0.001, verbose=False),
             iid='warn', n_jobs=None,
             param_grid=[{'C': [1, 10, 100, 1000, 5000],
                          'gamma': [0.001, 0.0001],
                          'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}],
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='recall_macro', verbose=0)

In [79]:
grid_search_recall.best_params_

{'C': 1000, 'gamma': 0.0001, 'kernel': 'sigmoid'}

In [80]:
grid_search_recall.best_estimator_

SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.0001, kernel='sigmoid',
    max_iter=-1, probability=False, random_state=42, shrinking=True, tol=0.001,
    verbose=False)

In [81]:
cvres = grid_search_recall.cv_results_
cvres

{'mean_fit_time': array([0.04212451, 0.23351817, 0.12083087, 0.14213524, 0.02794867,
        0.18436584, 0.25995588, 0.23515201, 0.04429321, 0.19737568,
        0.04616261, 0.0470849 , 0.02796903, 0.18280797, 0.11319594,
        0.13219023, 0.03437524, 0.18181376, 0.03698001, 0.03113799,
        0.02783699, 0.171105  , 0.04197083, 0.04382472, 0.02822919,
        0.12486463, 0.04123302, 0.03456798, 0.02979927, 0.19082046,
        0.05278835, 0.04782491, 0.03599043, 0.09211149, 0.037748  ,
        0.03305879, 0.02817187, 0.24071789, 0.04862075, 0.0445786 ]),
 'std_fit_time': array([0.00252782, 0.05236961, 0.00837652, 0.01099661, 0.00081859,
        0.0094558 , 0.00962799, 0.00765199, 0.00722321, 0.02620757,
        0.00047707, 0.00222362, 0.0018383 , 0.01506021, 0.00492932,
        0.01107365, 0.00121565, 0.01515954, 0.00137631, 0.0009875 ,
        0.00131896, 0.00336343, 0.00275559, 0.00082307, 0.00099002,
        0.00096376, 0.00335838, 0.00212048, 0.00341016, 0.01401675,
        0.012

# Final Testing

In [82]:
shuffle_index = np.random.permutation(len(X_test))
X_test, y_test = X_test[shuffle_index], y_test[shuffle_index]

scaler = StandardScaler()
X_test = scaler.fit_transform(X_test.astype(np.float64))
X_test

array([[ 0.        , -0.3474534 ,  0.8980759 , ...,  0.12082506,
        -0.56231903, -0.20553692],
       [ 0.        ,  2.85980878,  1.68471173, ..., -1.21241698,
        -0.56231903, -0.20553692],
       [ 0.        , -0.3474534 ,  0.8980759 , ...,  0.45413557,
        -0.33016897, -0.20553692],
       ...,
       [ 0.        , -0.3474534 ,  0.50475799, ..., -0.04583019,
        -0.56231903, -0.20553692],
       [ 0.        , -0.3474534 , -1.06851367, ..., -1.21241698,
        -0.56231903, -0.20553692],
       [ 0.        , -0.3474534 , -1.06851367, ...,  1.4540671 ,
        -0.33016897, -0.20553692]])

In [83]:
final_model = grid_search_pres.best_estimator_

In [84]:
final_predictions = final_model.predict(X_test)

In [85]:
sum(final_predictions == y_test) / len(X_test)

0.9861111111111112