In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold, cross_val_score
from joblib import dump
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn import datasets

In [26]:
digits_img = datasets.load_digits(n_class=10)
X = digits_img.data
y = digits_img.target
print(X.shape)
print(y.shape)

(1797, 64)
(1797,)


In [27]:
pipe = Pipeline([('scaler', MinMaxScaler(feature_range=(0, 1), copy=True)),
                 ('svc', SVC(kernel='rbf'))])

C_grid = [1e0, 1e1, 1e2, 1e3]
g_grid = ['scale', 1e-2, 1e-1, 1e0]

# Hyper paramet settings for grid search
param_grid = {
    'svc__C': C_grid,
    'svc__gamma' : g_grid
}

In [28]:
grid_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=13)  # for grid search
gen_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=17)  # for estimation of generalization performance

In [29]:
gs = GridSearchCV(pipe, param_grid , cv=grid_cv, scoring='accuracy')


In [30]:
%%time
nested_score = cross_val_score(gs, X=X, y=y, cv=gen_cv,
                               scoring='accuracy')
print(nested_score)
print(nested_score.mean())

[0.98888889 0.99109131 0.99109131 0.99331849]
0.9910975006186589
Wall time: 25.3 s


In [31]:
%%time
gs.fit(X, y)
gs_best = gs.best_estimator_

Wall time: 10 s


In [32]:
print(gs_best)

Pipeline(steps=[('scaler', MinMaxScaler()), ('svc', SVC(C=10.0))])


In [33]:
tag = 'ai-10-assign1'
model_file = 'svc_best_{}.joblib'.format(tag)
dump(gs_best, model_file)

['svc_best_ai-10-assign1.joblib']