In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.datasets import load_digits
 
digits = load_digits()

In [2]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(digits.data , digits.target ,\
                                                       test_size = 0.3)

In [3]:
lr = LogisticRegression(max_iter = 10000)
lr.fit(X_train , y_train)
lr.score(X_test , y_test)

0.9722222222222222

In [4]:
svm = SVC()
svm.fit(X_train , y_train)
svm.score(X_test , y_test)

0.9814814814814815

In [5]:
rf = RandomForestClassifier()
rf.fit(X_train , y_train)
rf.score(X_test , y_test)

0.9740740740740741

In [6]:
from sklearn.model_selection import KFold
kf = KFold(n_splits = 5 , random_state = 10 , shuffle = True)
kf

KFold(n_splits=5, random_state=10, shuffle=True)

In [7]:
for train_index , test_index in kf.split([1,2,3,4,5,6,7,8,9]) :
    print(train_index , test_index)

[0 1 3 4 5 6 7] [2 8]
[0 1 2 3 4 7 8] [5 6]
[0 2 4 5 6 7 8] [1 3]
[1 2 3 4 5 6 8] [0 7]
[0 1 2 3 5 6 7 8] [4]


In [8]:
def get_score(model , X_train , X_test , y_train ,y_test) :
    model.fit(X_train , y_train)
    return model.score(X_test , y_test)

In [9]:
get_score(LogisticRegression(max_iter = 10000) , X_train , X_test , y_train ,y_test)

0.9722222222222222

In [10]:
get_score(SVC() , X_train , X_test , y_train ,y_test)

0.9814814814814815

In [11]:
get_score(RandomForestClassifier(n_estimators = 40) , X_train , X_test , y_train ,y_test)

0.9722222222222222

In [12]:
from sklearn.model_selection import StratifiedKFold
folds = StratifiedKFold(n_splits = 3)

In [13]:
scores_lr = []
scores_svm = []
scores_rf = []

for train_index , test_index in kf.split(digits.data) :
    X_train , X_test , y_train , y_test = digits.data[train_index] , digits.data[test_index], \
                                            digits.target[train_index] , digits.target[test_index]
        
    scores_lr.append(get_score(LogisticRegression(max_iter = 10000) , X_train , X_test , y_train ,y_test))
    scores_svm.append(get_score(SVC() , X_train , X_test , y_train ,y_test))
    scores_rf.append(get_score(RandomForestClassifier(n_estimators = 40) , X_train , X_test , y_train ,y_test))

In [14]:
scores_lr

[0.9527777777777777,
 0.9638888888888889,
 0.9749303621169917,
 0.9665738161559888,
 0.9637883008356546]

In [15]:
scores_svm

[0.9833333333333333,
 0.9861111111111112,
 0.9805013927576601,
 0.9916434540389972,
 0.9860724233983287]

In [16]:
scores_rf

[0.9777777777777777,
 0.9722222222222222,
 0.9693593314763231,
 0.9860724233983287,
 0.9721448467966574]

In [17]:
from sklearn.model_selection import cross_val_score

In [18]:
cross_val_score(LogisticRegression(max_iter = 10000) , digits.data , digits.target)

array([0.925     , 0.875     , 0.93871866, 0.93593315, 0.89693593])

In [19]:
cross_val_score(SVC(), digits.data , digits.target)

array([0.96111111, 0.94444444, 0.98328691, 0.98885794, 0.93871866])

In [20]:
cross_val_score(RandomForestClassifier(n_estimators = 40), digits.data , digits.target)

array([0.91944444, 0.88888889, 0.95264624, 0.96100279, 0.91643454])

## Exercise


In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np

In [2]:
iris = load_iris()

In [3]:
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

## Logistic Regression

In [17]:
l_scores = cross_val_score(LogisticRegression(max_iter= 10000) , iris.data , iris.target)
l_scores

array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])

In [18]:
np.average(l_scores)

0.9733333333333334

## Decision Tree

In [19]:
d_scores = cross_val_score(DecisionTreeClassifier() , iris.data , iris.target)
d_scores

array([0.96666667, 0.96666667, 0.9       , 0.93333333, 1.        ])

In [20]:
np.average(d_scores)

0.9533333333333334

## Support Vector Machine(SVM)

In [21]:
s_scores = cross_val_score(SVC() , iris.data , iris.target)
s_scores

array([0.96666667, 0.96666667, 0.96666667, 0.93333333, 1.        ])

In [22]:
np.average(s_scores)

0.9666666666666666

## Random Forest

In [23]:
r_scores = cross_val_score(RandomForestClassifier(n_estimators=40) , iris.data , iris.target)
r_scores

array([0.96666667, 0.96666667, 0.93333333, 0.96666667, 1.        ])

In [24]:
np.average(r_scores)

0.9666666666666668

#### Best score so far is from Logistic Regression : 0.9733333333333334