In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.datasets import load_digits
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

In [51]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

In [5]:
digits = load_digits()

In [6]:
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [7]:
df = pd.DataFrame(digits.data)

In [8]:
df['target'] = digits.target

In [9]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4


In [10]:
x = df.drop(columns='target')

In [11]:
y = df['target']

In [12]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.2)

In [13]:
lr = LogisticRegression(solver='lbfgs', max_iter=100000)
lr.fit(x_train,y_train)
lr.score(x_test,y_test)

0.9694444444444444

In [14]:
svc = SVC()
svc.fit(x_train,y_train)
svc.score(x_test,y_test)

0.9888888888888889

In [15]:
dt = tree.DecisionTreeClassifier()
dt.fit(x_train,y_train)
dt.score(x_test,y_test)

0.8583333333333333

In [16]:
rf = RandomForestClassifier(n_estimators = 30)
rf.fit(x_train,y_train)
rf.score(x_test,y_test)

0.9694444444444444

In [17]:
# Provides train/test indices to split data in train/test sets. Split
# dataset into k consecutive folds (without shuffling by default
kf = KFold(n_splits = 3)

In [18]:
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [19]:
for train_index,test_index in kf.split([1,2,3,4,5,6,7,8,9]):
    print(train_index,test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


In [20]:
def get_score(model,x_train,x_test,y_train,y_test):
    model.fit(x_train,y_train)
    return model.score(x_test,y_test)

In [21]:
get_score(LogisticRegression(solver='lbfgs',max_iter=100000),x_train,x_test,y_train,y_test)

0.9694444444444444

In [22]:
get_score(RandomForestClassifier(),x_train,x_test,y_train,y_test)

0.9722222222222222

In [23]:
folds = StratifiedKFold(n_splits=3)

In [24]:
folds

StratifiedKFold(n_splits=3, random_state=None, shuffle=False)

In [25]:
len(digits.data)

1797

In [35]:
lscore = list();rfscore = list();dtscore = list();svcscore = list()

In [50]:
# as n_spilts or k =3 so it will iterate 3 times
for train_index,test_index in kf.split(digits.data,digits.target):
    x_train,x_test,y_train,y_test = digits.data[train_index],digits.data[test_index],digits.target[train_index],digits.target[test_index]
    print(get_score(RandomForestClassifier(n_estimators = 40),x_train,x_test,y_train,y_test))
    print(get_score(LogisticRegression(solver='lbfgs',max_iter=100000),x_train,x_test,y_train,y_test))
    print(get_score(SVC(),x_train,x_test,y_train,y_test))
    print(get_score(tree.DecisionTreeClassifier(),x_train,x_test,y_train,y_test))
    print('next iteration')

0.9232053422370617
0.9282136894824707
0.9666110183639399
0.7729549248747913
next iteration
0.9532554257095158
0.9415692821368948
0.9816360601001669
0.8046744574290484
next iteration
0.9198664440734557
0.9165275459098498
0.9549248747913188
0.7846410684474123
next iteration


In [53]:
cross_val_score(LogisticRegression(solver='lbfgs',max_iter=100000),digits.data,digits.target)

array([0.925     , 0.87777778, 0.93871866, 0.93593315, 0.89693593])

In [54]:
cross_val_score(SVC(),digits.data,digits.target)

array([0.96111111, 0.94444444, 0.98328691, 0.98885794, 0.93871866])

In [55]:
cross_val_score(RandomForestClassifier(),digits.data,digits.target)

array([0.93888889, 0.91111111, 0.95264624, 0.96100279, 0.93314763])

In [56]:
cross_val_score(tree.DecisionTreeClassifier(),digits.data,digits.target)

array([0.76944444, 0.72777778, 0.79387187, 0.8356546 , 0.79387187])