# KFold

In [2]:
import numpy as np
from sklearn import svm, datasets
from sklearn.model_selection import StratifiedKFold

# Dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

svc = svm.SVC()
skf = StratifiedKFold(n_splits=10, random_state=1)

scores_train = []
scores_val = []
for k, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    svc.fit(X[train_idx], y[train_idx])
    
    score_train = svc.score(X[train_idx], y[train_idx])
    scores_train.append(score_train)
    
    score_val = svc.score(X[val_idx], y[val_idx])
    scores_val.append(score_val)
    
    print("Fold: %2d, Class dist.: %s, Acc train/val: %.3f/%.3f" % (k+1, np.bincount(y[train_idx]), score_train, score_val))

Fold:  1, Class dist.: [45 45 45], Acc train/val: 0.985/1.000
Fold:  2, Class dist.: [45 45 45], Acc train/val: 0.985/0.933
Fold:  3, Class dist.: [45 45 45], Acc train/val: 0.985/1.000
Fold:  4, Class dist.: [45 45 45], Acc train/val: 0.985/1.000
Fold:  5, Class dist.: [45 45 45], Acc train/val: 0.985/1.000
Fold:  6, Class dist.: [45 45 45], Acc train/val: 0.993/0.933
Fold:  7, Class dist.: [45 45 45], Acc train/val: 0.993/0.933
Fold:  8, Class dist.: [45 45 45], Acc train/val: 0.978/1.000
Fold:  9, Class dist.: [45 45 45], Acc train/val: 0.993/1.000
Fold: 10, Class dist.: [45 45 45], Acc train/val: 0.978/1.000


# GridSearchCV

In [7]:
from pprint import pprint
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV

# Dataset
iris = datasets.load_iris()

# Classifier
svc = svm.SVC()

# GridSearchCV
parameters = {
    "kernel": ("linear", "rbf"),
    "C": [1, 10],
}
gs = GridSearchCV(estimator=svc,
                                  param_grid=parameters,
                                  scoring="accuracy",
                                  cv=10,  # KFold, データに合わせて自動で StratifiedKFold を使用したりしてくれる
                                  n_jobs=-1,  # 並列ジョブ数
                                  return_train_score=True)

gs.fit(iris.data, iris.target)

pprint(gs.best_score_)
pprint(gs.cv_results_)

0.98
{'mean_fit_time': array([0.00269082, 0.00134945, 0.0004746 , 0.0003952 ]),
 'mean_score_time': array([0.00091214, 0.00044131, 0.00020807, 0.0001796 ]),
 'mean_test_score': array([0.97333333, 0.98      , 0.98      , 0.97333333]),
 'mean_train_score': array([0.98814815, 0.98592593, 0.98      , 0.97703704]),
 'param_C': masked_array(data=[1, 1, 10, 10],
             mask=[False, False, False, False],
       fill_value='?',
            dtype=object),
 'param_kernel': masked_array(data=['linear', 'rbf', 'linear', 'rbf'],
             mask=[False, False, False, False],
       fill_value='?',
            dtype=object),
 'params': [{'C': 1, 'kernel': 'linear'},
            {'C': 1, 'kernel': 'rbf'},
            {'C': 10, 'kernel': 'linear'},
            {'C': 10, 'kernel': 'rbf'}],
 'rank_test_score': array([3, 1, 1, 3], dtype=int32),
 'split0_test_score': array([1., 1., 1., 1.]),
 'split0_train_score': array([0.98518519, 0.98518519, 0.97777778, 0.97037037]),
 'split1_test_score': array([