In [1]:
import os
import sys
import numpy as np
from scipy import sparse
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import mglearn
plt.rcParams['font.family'] = 'Malgun Gothic'
plt.rcParams['axes.unicode_minus'] = False
%config InlineBackend.figure_format = 'svg'
from mpl_toolkits.mplot3d import Axes3D, axes3d
from scipy.cluster import hierarchy
import seaborn as sns
import spacy
import nltk
from konlpy.tag import Okt
import graphviz

In [48]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold
from sklearn.model_selection import RepeatedKFold, RepeatedStratifiedKFold
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit
from sklearn.linear_model import LogisticRegression

In [8]:
from sklearn.datasets import make_blobs
from sklearn.datasets import load_iris

In [4]:
X, y = make_blobs(random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [5]:
logreg = LogisticRegression().fit(X_train, y_train)
logreg.score(X_test, y_test)

0.88

In [12]:
iris = load_iris()
logreg = LogisticRegression(max_iter=1000)

In [15]:
scores = cross_val_score(logreg, iris['data'], iris['target'], cv=10)
scores, scores.mean()

(array([1.        , 0.93333333, 1.        , 1.        , 0.93333333,
        0.93333333, 0.93333333, 1.        , 1.        , 1.        ]),
 0.9733333333333334)

In [19]:
res = cross_validate(logreg, iris['data'], iris['target'], return_train_score=True)
res

{'fit_time': array([0.01701808, 0.02201962, 0.01701641, 0.01901674, 0.01501369]),
 'score_time': array([0.00100112, 0.        , 0.        , 0.00100088, 0.        ]),
 'test_score': array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ]),
 'train_score': array([0.96666667, 0.96666667, 0.98333333, 0.98333333, 0.975     ])}

In [21]:
pd.DataFrame(res).mean()

fit_time       0.018017
score_time     0.000400
test_score     0.973333
train_score    0.975000
dtype: float64

In [22]:
iris['target']

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [27]:
kfold = KFold(n_splits=3, shuffle=True, random_state=0)
cross_val_score(logreg, iris['data'], iris['target'], cv=kfold)

array([0.98, 0.96, 0.96])

In [30]:
kfold = StratifiedKFold(n_splits=3)
cross_val_score(logreg, iris['data'], iris['target'], cv=kfold)

array([0.98, 0.96, 0.98])

In [33]:
loo = LeaveOneOut()
scores = cross_val_score(logreg, iris['data'], iris['target'], cv=loo)

In [35]:
len(scores)

150

In [40]:
ss = StratifiedShuffleSplit(train_size=0.5, test_size=0.5, n_splits=10)
scores = cross_val_score(logreg, iris['data'], iris['target'], cv=ss)
scores

array([0.98666667, 0.93333333, 0.97333333, 0.97333333, 0.96      ,
       0.98666667, 0.97333333, 0.94666667, 0.93333333, 0.94666667])

In [49]:
X, y = make_blobs(n_samples=12, random_state=0)
groups = [0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3]
scores = cross_val_score(logreg, X, y, cv=StratifiedGroupKFold(n_splits=3), groups=groups)
scores

array([0.75      , 0.66666667, 0.8       ])

In [52]:
rskfold = RepeatedStratifiedKFold(random_state=42)
scores = cross_val_score(logreg, iris['data'], iris['target'], cv=rskfold)
scores

array([1.        , 0.96666667, 0.93333333, 1.        , 0.93333333,
       0.96666667, 0.96666667, 0.93333333, 1.        , 0.96666667,
       0.93333333, 1.        , 1.        , 0.96666667, 0.96666667,
       0.9       , 1.        , 1.        , 0.93333333, 0.96666667,
       0.93333333, 0.96666667, 0.96666667, 1.        , 0.96666667,
       1.        , 0.96666667, 0.96666667, 0.9       , 1.        ,
       0.96666667, 0.96666667, 0.96666667, 0.96666667, 0.93333333,
       0.96666667, 0.96666667, 1.        , 1.        , 0.9       ,
       0.96666667, 1.        , 0.9       , 0.96666667, 0.96666667,
       0.9       , 0.96666667, 0.96666667, 1.        , 0.96666667])