In [1]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
digits=load_digits()

In [2]:
dir(digits)

['DESCR', 'data', 'images', 'target', 'target_names']

In [3]:
xtrain,xtest,ytrain,ytest=train_test_split(digits.data,digits.target,test_size=0.3)

#### Logistic Regression

In [4]:
lr=LogisticRegression()
lr.fit(xtrain,ytrain)
lr.score(xtest,ytest)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


0.9629629629629629

#### SVM  

In [5]:
svm=SVC(gamma='auto')
svm.fit(xtrain,ytrain)
svm.score(xtest,ytest)

0.31296296296296294

#### Random Forest Classifier

In [6]:
rf=RandomForestClassifier(n_estimators=70)
rf.fit(xtrain,ytrain)
rf.score(xtest,ytest)

0.9777777777777777

### K-Fold Classifier

In [7]:
from sklearn.model_selection import KFold
kf=KFold()
kf

KFold(n_splits=5, random_state=None, shuffle=False)

In [9]:
for train_index,test_index in kf.split([1,2,3,4,5,6,7,8,9,10]):
    print(train_index,test_index)

[2 3 4 5 6 7 8 9] [0 1]
[0 1 4 5 6 7 8 9] [2 3]
[0 1 2 3 6 7 8 9] [4 5]
[0 1 2 3 4 5 8 9] [6 7]
[0 1 2 3 4 5 6 7] [8 9]


In [10]:
def get_score(model,xtrain,xtest,ytrain,ytest):
    model.fit(xtrain,ytrain)
    return model.score(xtest,ytest)

In [13]:
get_score(RandomForestClassifier(n_estimators=40),xtrain,xtest,ytrain,ytest)

0.9703703703703703

In [14]:
from sklearn.model_selection import StratifiedKFold
fold=StratifiedKFold()
fold

StratifiedKFold(n_splits=5, random_state=None, shuffle=False)

In [78]:
score_l=[]
score_svm=[]
score_rf=[]
for train_index,test_index in fold.split(digits.data,digits.target):
    xtrain,xtest,ytrain,ytest=digits.data[train_index],digits.data[test_index],digits.target[train_index],digits.target[test_index]
    score_l.append(get_score(LogisticRegression(solver='liblinear',multi_class='ovr'),xtrain,xtest,ytrain,ytest))
    score_svm.append(get_score(SVC(gamma='auto'),xtrain,xtest,ytrain,ytest))
    score_rf.append(get_score(RandomForestClassifier(n_estimators=70),xtrain,xtest,ytrain,ytest))

In [79]:
score_l

[0.9222222222222223,
 0.8833333333333333,
 0.9526462395543176,
 0.958217270194986,
 0.8941504178272981]

In [80]:
score_svm

[0.4111111111111111,
 0.45,
 0.45403899721448465,
 0.44846796657381616,
 0.479108635097493]

In [81]:
score_rf

[0.9277777777777778,
 0.8916666666666667,
 0.958217270194986,
 0.9610027855153204,
 0.9220055710306406]

In [87]:
import numpy as np

In [88]:
np.average(score_l)

0.9221138966264315

In [89]:
np.average(score_svm)

0.448545341999381

In [90]:
np.average(score_rf)

0.9321340142370783

## By using cross_val_score function

In [91]:
from sklearn.model_selection import cross_val_score

In [92]:
cross_val_score(LogisticRegression(solver='liblinear',multi_class='ovr'),digits.data,digits.target,cv=5)

array([0.92222222, 0.88333333, 0.95264624, 0.95821727, 0.89415042])

In [95]:
cross_val_score(SVC(gamma='auto'),digits.data,digits.target,cv=5)

array([0.41111111, 0.45      , 0.454039  , 0.44846797, 0.47910864])

In [94]:
cross_val_score(RandomForestClassifier(n_estimators=40,),digits.data,digits.target,cv=5)

array([0.93611111, 0.90833333, 0.95264624, 0.94986072, 0.93036212])

In [96]:
np.average(cross_val_score(LogisticRegression(solver='liblinear',multi_class='ovr'),digits.data,digits.target,cv=5))

0.9221138966264315

In [97]:
np.average(cross_val_score(SVC(gamma='auto'),digits.data,digits.target,cv=5))

0.448545341999381

In [99]:
np.average(cross_val_score(RandomForestClassifier(n_estimators=40),digits.data,digits.target,cv=5))

0.9237852058186318

In [100]:
np.average(cross_val_score(RandomForestClassifier(n_estimators=5),digits.data,digits.target,cv=5))

0.870939337666357

In [101]:
np.average(cross_val_score(RandomForestClassifier(n_estimators=10),digits.data,digits.target,cv=5))

0.903201795109873

In [102]:
np.average(cross_val_score(RandomForestClassifier(n_estimators=15),digits.data,digits.target,cv=5))

0.9193330238316311

In [103]:
np.average(cross_val_score(RandomForestClassifier(n_estimators=20),digits.data,digits.target,cv=5))

0.9137759207675643

In [104]:
np.average(cross_val_score(RandomForestClassifier(n_estimators=30),digits.data,digits.target,cv=5))

0.9354627050448778

In [105]:
np.average(cross_val_score(RandomForestClassifier(n_estimators=40),digits.data,digits.target,cv=5))

0.9438068709377901

In [106]:
np.average(cross_val_score(RandomForestClassifier(n_estimators=50),digits.data,digits.target,cv=5))

0.9382544103992572

In [107]:
np.average(cross_val_score(RandomForestClassifier(n_estimators=45),digits.data,digits.target,cv=5))

0.9354673475704116