In [88]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVR, SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold, cross_val_score, train_test_split
from sklearn.pipeline import Pipeline
from joblib import dump
from sklearn.metrics import accuracy_score
from sklearn import datasets

In [89]:
digits_img = datasets.load_digits(n_class=10)
X = digits_img.data
y = digits_img.target
print(X.shape)
print(y.shape)

(1797, 64)
(1797,)


In [90]:
pipe = Pipeline([('scaler', MinMaxScaler(feature_range=(0, 1), copy=True)),
                 ('svr', SVR(kernel='rbf'))])

C_grid = [1e0, 1e1, 1e2, 1e3]
g_grid = ['scale', 1e-2, 1e-1, 1e0]
e_grid = [0.01, 0.1, 1.0, 10.0]

# Hyper paramet settings for grid search
param_grid = {
    'svr__C': C_grid,
    'svr__gamma' : g_grid,
    'svr__epsilon' : e_grid,
}

In [91]:
grid_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=13)  # for grid search
gen_cv = StratifiedKFold(n_splits=4, shuffle=True, random_state=17)  # for estimation of generalization performance

In [92]:
gs = GridSearchCV(pipe, param_grid , cv=grid_cv, scoring='neg_mean_squared_error')


In [93]:
%%time
nested_score = cross_val_score(gs, X=X, y=y, cv=gen_cv,
                               scoring='neg_mean_squared_error')
print(nested_score)
print(np.sqrt(-nested_score.mean()))

[-0.56797658 -0.57361861 -0.52468216 -0.44417923]
0.7263705278421325
Wall time: 2min 25s


In [94]:
%%time
gs.fit(X, y)
gs_best = gs.best_estimator_

Wall time: 1min 6s


In [95]:
print(gs_best)

Pipeline(steps=[('scaler', MinMaxScaler()),
                ('svr', SVR(C=100.0, epsilon=0.01))])


In [96]:
tag = 'ai-10-assign1'
model_file = 'svc_best_{}.joblib'.format(tag)
dump(gs_best, model_file)

['svc_best_ai-10-assign1.joblib']