In [20]:
# 交差検証をしていない単純なグリッドサーチ
# テストケースをパラメータのチューニングに使ってしまっているため汎化性能が損なわれている
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

X = load_iris().data
y = load_iris().target
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)

best_score = 0

for gamma in [0.001,0.01,0.1,1,10,100]:
    for C in [0.001,0.01,0.1,1,10,100]:
        svm = SVC(gamma=gamma,C=C)
        svm.fit(X_train,y_train)
        score = svm.score(X_test,y_test)
        if score > best_score:
            best_score = score
            best_parameters = {'C':C,'gamma':gamma}

print("Best score:{:.2f}".format(best_score))
print("Best parameters:{}".format(best_parameters))

Best score:1.00
Best parameters:{'C': 100, 'gamma': 0.01}


In [24]:
#CV GridSearch
from sklearn.model_selection import GridSearchCV
#探索するパラメータを辞書の形で指定する
param_grid = {'C':[0.001,0.01,0.1,1,10,100],
            'gamma':[0.001,0.01,0.1,1,10,100]}
#モデル(SVC),パラメターグリッド,交差検証の方法(この場合5分割層化検証)を指定する.
grid_search = GridSearchCV(SVC(),param_grid,cv=5)

X_train,X_test,y_train,y_test = train_test_split(load_iris().data,load_iris().target,random_state=0)

grid_search.fit(X_train,y_train)
#パタメータの選択にテストケースを用いていないことに注意
print("Test set score:{:.2f}".format(grid_search.score(X_test,y_test)))
print("Best parameters:{}".format(grid_search.best_params_))

Test set score:0.97
Best parameters:{'C': 100, 'gamma': 0.01}


