In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_digits

In [2]:
digits=load_digits()

In [8]:
digits.data

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [10]:
X_train, X_test, y_train, y_test=train_test_split(digits.data,digits.target,test_size=0.3)

In [14]:
lr= LogisticRegression(max_iter=10000)
lr.fit(X_train,y_train)
lr.score(X_test,y_test)

0.9611111111111111

In [16]:
svm=SVC()
svm.fit(X_train,y_train)
svm.score(X_test,y_test)

0.9907407407407407

In [19]:
rf= RandomForestClassifier(n_estimators=1000)
rf.fit(X_train,y_train)
rf.score(X_test,y_test)

0.9722222222222222

In [20]:
from sklearn.model_selection import KFold

In [22]:
kf=KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [23]:
for train_index,test_index in kf.split([1,2,3,4,5,6,7,8,9]):
    print(train_index,test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


In [24]:
def get_score(model,X_train,X_test,y_train,y_test):
    model.fit(X_train,y_train)
    return model.score(X_test,y_test)

In [26]:
get_score(LogisticRegression(max_iter=10000),X_train,X_test,y_train,y_test)

0.9611111111111111

In [27]:
from sklearn.model_selection import StratifiedKFold

In [29]:
folds=StratifiedKFold(n_splits=3)

In [32]:
for train_index, test_index in folds.split(digits.data,digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index],digits.target[train_index], digits.target[test_index]

    print(get_score(LogisticRegression(max_iter=10000),X_train, X_test, y_train, y_test))
    print(get_score(SVC(),X_train, X_test, y_train, y_test))
    print(get_score(RandomForestClassifier(),X_train, X_test, y_train, y_test))

0.9248747913188647
0.9649415692821369
0.9382303839732888
0.9398998330550918
0.9799666110183639
0.9532554257095158
0.9232053422370617
0.9649415692821369
0.9265442404006677


In [37]:
scores_l=[]
scores_svm=[]
scores_rf=[]

for train_index, test_index in folds.split(digits.data,digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index],digits.target[train_index], digits.target[test_index]

    scores_l.append(get_score(LogisticRegression(max_iter=10000),X_train, X_test, y_train, y_test))
    scores_svm.append(get_score(SVC(),X_train, X_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=100),X_train, X_test, y_train, y_test))

In [38]:
print(scores_l)
print(scores_svm)
print(scores_rf)

[0.9248747913188647, 0.9398998330550918, 0.9232053422370617]
[0.9649415692821369, 0.9799666110183639, 0.9649415692821369]
[0.9315525876460768, 0.9632721202003339, 0.9298831385642737]


In [39]:
print(scores_l)
print(scores_svm)
print(scores_rf)

AttributeError: 'list' object has no attribute 'mean'

In [43]:
print(np.average(scores_l))
print(np.average(scores_svm))
print(np.average(scores_rf))

0.9293266555370061
0.9699499165275459
0.9415692821368947


In [44]:
print(np.mean(scores_l))
print(np.mean(scores_svm))
print(np.mean(scores_rf))

0.9293266555370061
0.9699499165275459
0.9415692821368947


# method 2 best

In [45]:
from sklearn.model_selection import cross_val_score

In [49]:
cross_val_score(LogisticRegression(max_iter=10000),digits.data,digits.target)

array([0.925     , 0.87777778, 0.93871866, 0.93314763, 0.89693593])

In [50]:
scores_l_method2=cross_val_score(LogisticRegression(max_iter=10000),digits.data,digits.target)

In [52]:
scores_svm_method2=cross_val_score(SVC(),digits.data,digits.target)

In [53]:
scores_rf_method2=cross_val_score(RandomForestClassifier(n_estimators=100),digits.data,digits.target)

In [54]:
print(scores_l_method2)
print(scores_svm_method2)
print(scores_rf_method2)

[0.925      0.87777778 0.93871866 0.93314763 0.89693593]
[0.96111111 0.94444444 0.98328691 0.98885794 0.93871866]
[0.93333333 0.91111111 0.96100279 0.95821727 0.93314763]


# tuning parameter

In [55]:
scores_rf_method2=cross_val_score(RandomForestClassifier(n_estimators=50),digits.data,digits.target)

In [56]:
print(scores_rf_method2)

[0.92777778 0.89166667 0.96100279 0.97771588 0.91643454]


In [57]:
scores_rf_method2=cross_val_score(RandomForestClassifier(n_estimators=500),digits.data,digits.target)

In [58]:
print(scores_rf_method2)

[0.93611111 0.91944444 0.9637883  0.96100279 0.92200557]
