## KFold

In [1]:
import numpy as np
from sklearn.model_selection import KFold

X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
y = np.array([1, 2, 3, 4])
X.shape

(4, 2)

In [2]:
kf = KFold(n_splits=2)
for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]


TRAIN: [2 3] TEST: [0 1]
TRAIN: [0 1] TEST: [2 3]


In [3]:
kf = KFold(n_splits=4)
for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [1 2 3] TEST: [0]
TRAIN: [0 2 3] TEST: [1]
TRAIN: [0 1 3] TEST: [2]
TRAIN: [0 1 2] TEST: [3]


In [4]:
kf = KFold(n_splits=4, shuffle=True)
for train_index, test_index in kf.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

TRAIN: [0 1 3] TEST: [2]
TRAIN: [1 2 3] TEST: [0]
TRAIN: [0 2 3] TEST: [1]
TRAIN: [0 1 2] TEST: [3]


## GridSearchCV

In [5]:
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV

iris = datasets.load_wine()
parameters = {'kernel':('linear', 'rbf', 'poly'), 'C':[0.1, 1, 10], 'gamma':[1, 10, 15]}

svc = svm.SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(iris.data, iris.target)
clf.best_estimator_

In [6]:
svc = svm.SVC(C=1, kernel='linear')
clf.fit(iris.data, iris.target)
clf.score(iris.data, iris.target)


1.0

## cross_val_score

In [7]:
from sklearn import datasets, linear_model
from sklearn.model_selection import cross_val_score

diabetes = datasets.load_diabetes()
X = diabetes.data #[:150]
y = diabetes.target #[:150]
lasso = linear_model.Lasso()
print(cross_val_score(lasso, X, y, cv=3))

[0.34542953 0.34712348 0.36884641]
