In [3]:
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

iris = load_iris()
logreg = LogisticRegression(max_iter=400)

scores = cross_val_score(logreg, iris.data, iris.target, cv=5)
for score in scores:
    print(score)
print("Average cross-validation score: {:.2f}".format(scores.mean()))

0.9666666666666667
1.0
0.9333333333333333
0.9666666666666667
1.0
Average cross-validation score: 0.97


In [4]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5)
scores_strat = cross_val_score(logreg, iris.data, iris.target, cv=skf)
for score in scores_strat:
    print(score)
print("Average cross-validation score: {:.2f}".format(scores_strat.mean()))

0.9666666666666667
1.0
0.9333333333333333
0.9666666666666667
1.0
Average cross-validation score: 0.97


In [5]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=5)
scores_standard = cross_val_score(logreg, iris.data, iris.target, cv=kfold)
for score in scores_standard:
    print(score)
print("Average cross-validation score: {:.2f}".format(scores_standard.mean()))

1.0
1.0
0.8666666666666667
0.9333333333333333
0.8333333333333334
Average cross-validation score: 0.93


In [6]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits=3)
scores_standard = cross_val_score(logreg, iris.data, iris.target, cv=kfold)
for score in scores_standard:
    print(score)
print("Average cross-validation score: {:.2f}".format(scores_standard.mean()))

0.0
0.0
0.0
Average cross-validation score: 0.00


In [7]:
from sklearn.model_selection import LeaveOneOut
loo = LeaveOneOut()
scores = cross_val_score(logreg, iris.data, iris.target, cv=loo)
print("Number of cv iterations: ", len(scores))
print("Mean accuracy: {:.2f}".format(scores.mean()))

Number of cv iterations:  150
Mean accuracy: 0.97


In [8]:
from sklearn.model_selection import ShuffleSplit
shuffle_split = ShuffleSplit(test_size=.5, train_size=.5, n_splits=10)
scores = cross_val_score(logreg, iris.data, iris.target, cv=shuffle_split)
for score in scores:
    print(score)

0.92
0.9733333333333334
0.96
0.9733333333333334
0.9466666666666667
0.9733333333333334
0.9466666666666667
0.96
0.9466666666666667
0.9466666666666667


In [10]:
from sklearn.model_selection import GroupKFold
from sklearn.datasets import make_blobs
# create synthetic dataset
X, y = make_blobs(n_samples=12, random_state=0)
# assume the first three samples belong to the same group,
# then the next four, etc.
groups = [0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3]
scores = cross_val_score(logreg, X, y, groups=groups, cv=GroupKFold(n_splits=3))
print("Cross-validation scores:\n{}".format(scores))

Cross-validation scores:
[0.75       0.6        0.66666667]
