In [13]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.datasets import load_digits

In [2]:
digits = load_digits()
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [4]:
x_train,x_test,y_train,y_test = train_test_split(digits.data,digits.target,test_size=0.2)
x_train.shape,x_test.shape

((1437, 64), (360, 64))

### This is manually you can perform KFold

In [9]:
kf = StratifiedKFold(n_splits=5)
kf

StratifiedKFold(n_splits=5, random_state=None, shuffle=False)

In [7]:
def get_score(model,x_train,x_test,y_train,y_test):
  model.fit(x_train,y_train)
  return model.score(x_test,y_test)

In [10]:
scores_l = []
scores_svm = []
scores_rf = []
for train_index,test_index in kf.split(digits.data,digits.target):
  x_train,x_test,y_train,y_test = digits.data[train_index],digits.data[test_index],digits.target[train_index],digits.target[test_index]
  scores_l.append(get_score(LogisticRegression(),x_train,x_test,y_train,y_test))
  scores_svm.append(get_score(SVC(),x_train,x_test,y_train,y_test))
  scores_rf.append(get_score(RandomForestClassifier(),x_train,x_test,y_train,y_test))

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [11]:
print('Logistic Regression Scores:'+str(scores_l))
print('SVC Scores:'+str(scores_svm))
print('Random Forest Scores:'+str(scores_rf))

Logistic Regression Scores:[0.9222222222222223, 0.8694444444444445, 0.9415041782729805, 0.9387186629526463, 0.8969359331476323]
SVC Scores:[0.9611111111111111, 0.9444444444444444, 0.9832869080779945, 0.9888579387186629, 0.9387186629526463]
Random Forest Scores:[0.9361111111111111, 0.9138888888888889, 0.9526462395543176, 0.9665738161559888, 0.935933147632312]


### Now with ready made method

In [15]:
cross_val_score(LogisticRegression(max_iter=10000),digits.data,digits.target)

array([0.92222222, 0.87222222, 0.94150418, 0.94150418, 0.89693593])

In [16]:
cross_val_score(SVC(),digits.data,digits.target)

array([0.96111111, 0.94444444, 0.98328691, 0.98885794, 0.93871866])

In [17]:
cross_val_score(RandomForestClassifier(n_estimators=40),digits.data,digits.target)

array([0.93611111, 0.91944444, 0.95543175, 0.9637883 , 0.93593315])

## We can use KFold Cross Validation For Parameter Tunning by changing parameter values on same Algorithm

In [18]:
cross_val_score(RandomForestClassifier(n_estimators=30),digits.data,digits.target)

array([0.93055556, 0.90277778, 0.94150418, 0.96935933, 0.92479109])

## Exercise

In [19]:
from sklearn.datasets import load_iris

In [20]:
flowers = load_iris()
dir(flowers)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

In [21]:
score1=cross_val_score(LogisticRegression(),flowers.data,flowers.target)
score2=cross_val_score(SVC(),flowers.data,flowers.target)
score3=cross_val_score(RandomForestClassifier(),flowers.data,flowers.target)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [22]:
score1,score1.mean()

(array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ]),
 np.float64(0.9733333333333334))

In [23]:
score2,score2.mean()

(array([0.96666667, 0.96666667, 0.96666667, 0.93333333, 1.        ]),
 np.float64(0.9666666666666666))

In [24]:
score3,score3.mean()

(array([0.96666667, 0.96666667, 0.9       , 0.93333333, 1.        ]),
 np.float64(0.9533333333333334))