In [91]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
digits =load_digits()

In [92]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(digits.data,digits.target,test_size=0.5)

# logistic regression

In [93]:
lr = LogisticRegression(solver='liblinear',multi_class='ovr')
lr.fit(X_train, y_train)
lr.score(X_test, y_test)

0.9577308120133482

# svm (support vector machine)

In [94]:
svm = SVC(gamma='auto')
svm.fit(X_train, y_train)
svm.score(X_test, y_test)


0.18687430478309233

# randomforest classifier

In [86]:
rf = RandomForestClassifier(n_estimators=40)
rf.fit(X_train, y_train)
rf.score(X_test, y_test)


0.9592592592592593

#### K FOLD VALIDATION

In [95]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=10)
kf


KFold(n_splits=10, random_state=None, shuffle=False)

In [97]:
for train_index, test_index in kf.split([1,2,3,4,5,6,7,8,9,10]):
    print(train_index, test_index)


[1 2 3 4 5 6 7 8 9] [0]
[0 2 3 4 5 6 7 8 9] [1]
[0 1 3 4 5 6 7 8 9] [2]
[0 1 2 4 5 6 7 8 9] [3]
[0 1 2 3 5 6 7 8 9] [4]
[0 1 2 3 4 6 7 8 9] [5]
[0 1 2 3 4 5 7 8 9] [6]
[0 1 2 3 4 5 6 8 9] [7]
[0 1 2 3 4 5 6 7 9] [8]
[0 1 2 3 4 5 6 7 8] [9]


In [64]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    return model.score(X_test, y_test)


In [108]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits=10)

scores_logistic = []
scores_svm = []
scores_rf = []

for train_index, test_index in folds.split(digits.data,digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index], digits.data[test_index],digits.target[train_index], digits.target[test_index]
    scores_logistic.append(get_score(LogisticRegression(solver='liblinear',multi_class='ovr'), X_train, X_test, y_train, y_test))  
    scores_svm.append(get_score(SVC(gamma='auto'), X_train, X_test, y_train, y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators=40), X_train, X_test, y_train, y_test))

In [109]:
scores_logistic


[0.9111111111111111,
 0.9388888888888889,
 0.8944444444444445,
 0.8666666666666667,
 0.9444444444444444,
 0.9722222222222222,
 0.9777777777777777,
 0.9497206703910615,
 0.8603351955307262,
 0.9441340782122905]

In [110]:
scores_svm

[0.4388888888888889,
 0.5777777777777777,
 0.4666666666666667,
 0.3888888888888889,
 0.4722222222222222,
 0.4,
 0.5055555555555555,
 0.5754189944134078,
 0.5586592178770949,
 0.43575418994413406]

In [111]:
scores_rf

[0.9,
 0.9666666666666667,
 0.9444444444444444,
 0.9222222222222223,
 0.9555555555555556,
 0.9777777777777777,
 0.9777777777777777,
 0.9553072625698324,
 0.9273743016759777,
 0.9217877094972067]

# cross_val_score function

In [66]:
from sklearn.model_selection import cross_val_score

# for logistic regression

In [113]:
cross_val_score(LogisticRegression(solver='liblinear',multi_class='ovr'), digits.data, digits.target,cv=10)


array([0.91111111, 0.93888889, 0.89444444, 0.86666667, 0.94444444,
       0.97222222, 0.97777778, 0.94972067, 0.8603352 , 0.94413408])

# for svm 

In [114]:
cross_val_score(SVC(gamma='auto'), digits.data, digits.target,cv=10)

array([0.43888889, 0.57777778, 0.46666667, 0.38888889, 0.47222222,
       0.4       , 0.50555556, 0.57541899, 0.55865922, 0.43575419])

# randomforestclassifier

In [76]:
cross_val_score(RandomForestClassifier(n_estimators=40),digits.data, digits.target,cv=9)


array([0.91      , 0.965     , 0.94      , 0.945     , 0.94      ,
       0.975     , 0.95979899, 0.94472362, 0.93467337])

# parameter turning

In [115]:
scores1 = cross_val_score(RandomForestClassifier(n_estimators=5),digits.data, digits.target, cv=10)
np.average(scores1)


0.8692209807572937

In [80]:
scores2 = cross_val_score(RandomForestClassifier(n_estimators=20),digits.data, digits.target, cv=10)
np.average(scores2)

0.9304345127250155

In [81]:
scores3 = cross_val_score(RandomForestClassifier(n_estimators=30),digits.data, digits.target, cv=10)
np.average(scores3)


0.9410148975791433

In [82]:
scores4 = cross_val_score(RandomForestClassifier(n_estimators=40),digits.data, digits.target, cv=10)
np.average(scores4)


0.9482340161390441

In [117]:
scores5 = cross_val_score(RandomForestClassifier(n_estimators=50),digits.data, digits.target, cv=10)
np.average(scores5)


0.9460055865921786

In [118]:
scores6= cross_val_score(RandomForestClassifier(n_estimators=60),digits.data, digits.target, cv=10)
np.average(scores6)


0.9504748603351955