In [9]:
from sklearn.datasets import load_digits

digits = load_digits()

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.3)

In [11]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

from sklearn.svm import SVC
svm = SVC()

from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()

In [12]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)

In [13]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [14]:
for train_index, test_index in kf.split([1,2,3,4,5,6,7,8,9]):
    print(train_index, test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


In [29]:
score_lr = []
score_svm = []
score_rf = []

In [30]:
for train_index, test_index in kf.split(digits.data):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], \
                                       digits.target[train_index], digits.target[test_index]
    score_lr.append(get_score(LogisticRegression(max_iter=10000), X_train, X_test, y_train, y_test))
    score_svm.append(get_score(svm, X_train, X_test, y_train, y_test))
    score_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

In [31]:
score_lr

[0.9248747913188647, 0.9432387312186978, 0.9148580968280468]

In [32]:
score_svm

[0.9666110183639399, 0.9816360601001669, 0.9549248747913188]

In [33]:
score_rf

[0.9382303839732888, 0.9432387312186978, 0.9248747913188647]

In [35]:
# using stratified keyfold
# for uniform dataset division

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=3)

In [40]:
score_skf_lr = []
score_skf_svm = []
score_skf_rf = []

In [42]:
for train_index, test_index in skf.split(digits.data, digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index], \
                                       digits.target[train_index], digits.target[test_index]
    score_skf_lr.append(get_score(LogisticRegression(max_iter=10000), X_train, X_test, y_train, y_test))
    score_skf_svm.append(get_score(svm, X_train, X_test, y_train, y_test))
    score_skf_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

In [43]:
score_skf_lr

[0.9198664440734557, 0.9432387312186978, 0.9165275459098498]

In [44]:
score_skf_svm

[0.9649415692821369, 0.9799666110183639, 0.9649415692821369]

In [45]:
score_skf_rf

[0.9332220367278798, 0.9415692821368948, 0.9232053422370617]

In [46]:
# using cross val score

from sklearn.model_selection import cross_val_score

In [47]:
cross_val_score(LogisticRegression(max_iter=10000), digits.data, digits.target)

array([0.92222222, 0.87222222, 0.94150418, 0.94150418, 0.89693593])

In [48]:
cross_val_score(svm, digits.data, digits.target)

array([0.96111111, 0.94444444, 0.98328691, 0.98885794, 0.93871866])

In [49]:
cross_val_score(RandomForestClassifier(n_estimators=40), digits.data, digits.target)

array([0.93333333, 0.88333333, 0.95264624, 0.96657382, 0.91922006])

# IRIS Classification Cross Validation

In [69]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

iris = load_iris()

In [70]:
skf = StratifiedKFold(n_splits=3)

In [71]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [72]:
iris_score_lr = []
iris_score_svm = []
iris_score_rf = []
iris_score_dt = []

In [73]:
for train_index, test_index in skf.split(iris.data, iris.target):
    X_train, X_test, y_train, y_test = iris.data[train_index], iris.data[test_index], iris.target[train_index], iris.target[test_index]
    iris_score_lr.append(get_score(LogisticRegression(max_iter=10000), X_train, X_test, y_train, y_test))
    iris_score_svm.append(get_score(svm, X_train, X_test, y_train, y_test))
    iris_score_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))
    iris_score_dt.append(get_score(DecisionTreeClassifier(), X_train, X_test, y_train, y_test))

In [74]:
print(iris_score_lr, np.mean(iris_score_lr))
print(iris_score_svm, np.mean(iris_score_svm))
print(iris_score_rf, np.mean(iris_score_rf))
print(iris_score_dt, np.mean(iris_score_dt))

[0.98, 0.96, 0.98] 0.9733333333333333
[0.96, 0.98, 0.94] 0.96
[0.98, 0.94, 0.94] 0.9533333333333333
[0.98, 0.92, 1.0] 0.9666666666666667


In [75]:
iris_score_cv_lr = cross_val_score(LogisticRegression(max_iter=10000), iris.data, iris.target)
iris_score_cv_svm = cross_val_score(svm, iris.data, iris.target)
iris_score_cv_rf = cross_val_score(RandomForestClassifier(n_estimators=40), iris.data, iris.target)
iris_score_cv_dt = cross_val_score(DecisionTreeClassifier(), iris.data, iris.target)

In [77]:
print("RF: ", iris_score_cv_rf, np.mean(iris_score_cv_rf))
print("LR: ", iris_score_cv_lr, np.mean(iris_score_cv_lr))
print("SVM: ", iris_score_cv_svm, np.mean(iris_score_cv_svm))
print("DT: ", iris_score_cv_dt, np.mean(iris_score_cv_dt))

RF:  [0.96666667 0.96666667 0.93333333 0.9        1.        ] 0.9533333333333334
LR:  [0.96666667 1.         0.93333333 0.96666667 1.        ] 0.9733333333333334
SVM:  [0.96666667 0.96666667 0.96666667 0.93333333 1.        ] 0.9666666666666666
DT:  [0.96666667 0.96666667 0.9        0.96666667 1.        ] 0.9600000000000002
