In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression,LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
sns.set()
%matplotlib inline

def warn(*args, **kwargs): pass
import warnings
warnings.warn = warn

In [20]:
digits = load_digits()
dir(digits)

['DESCR', 'data', 'images', 'target', 'target_names']

In [21]:
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target,test_size=0.20)

In [22]:
model_log = LogisticRegression()
model_log.fit(X_train,y_train)
model_log.score(X_test,y_test)

0.9611111111111111

In [23]:
model_log = RandomForestClassifier()
model_log.fit(X_train,y_train)
model_log.score(X_test,y_test)



0.9527777777777777

In [24]:
model_log = SVC( C=1.0, kernel='linear')
model_log.fit(X_train,y_train)
model_log.score(X_test,y_test)

0.9861111111111112

In [25]:
from sklearn.model_selection import KFold
kfold = KFold(n_splits = 3)
le_score = []
svc_score = []
ran_score = []

def get_score(model,X_train, X_test, y_train, y_test):
    model.fit(X_train,y_train)
    return model.score(X_test,y_test)

for x_index, y_index in kfold.split(digits.data,digits.target):
    X_train, X_test, y_train, y_test = digits.data[x_index],digits.data[y_index],digits.target[x_index],digits.target[y_index]
    le_score.append(get_score(LogisticRegression(),X_train, X_test, y_train, y_test))
    svc_score.append(get_score(SVC(C=1.0,kernel='linear'),X_train, X_test, y_train, y_test))
    ran_score.append(get_score(RandomForestClassifier(n_estimators=40),X_train, X_test, y_train, y_test))

In [26]:
le_score

[0.8964941569282137, 0.9515859766277128, 0.9115191986644408]

In [27]:
svc_score

[0.9348914858096828, 0.9565943238731218, 0.9398998330550918]

In [28]:
ran_score

[0.9432387312186978, 0.9415692821368948, 0.9232053422370617]

In [29]:
from sklearn.model_selection import StratifiedKFold
kfold = StratifiedKFold(n_splits = 3)
le_score = []
svc_score = []
ran_score = []

def get_score(model,X_train, X_test, y_train, y_test):
    model.fit(X_train,y_train)
    return model.score(X_test,y_test)

for x_index, y_index in kfold.split(digits.data,digits.target):
    X_train, X_test, y_train, y_test = digits.data[x_index],digits.data[y_index],digits.target[x_index],digits.target[y_index]
    le_score.append(get_score(LogisticRegression(),X_train, X_test, y_train, y_test))
    svc_score.append(get_score(SVC(C=1.0,kernel='linear'),X_train, X_test, y_train, y_test))
    ran_score.append(get_score(RandomForestClassifier(n_estimators=40),X_train, X_test, y_train, y_test))

In [30]:
print(le_score)
print(svc_score)
print(ran_score)

[0.8953488372093024, 0.9499165275459098, 0.9093959731543624]
[0.9352159468438538, 0.9582637729549248, 0.9379194630872483]
[0.9302325581395349, 0.9532554257095158, 0.912751677852349]


In [31]:
from sklearn.model_selection import StratifiedShuffleSplit
kfold = StratifiedShuffleSplit(n_splits = 3)
le_score = []
svc_score = []
ran_score = []

def get_score(model,X_train, X_test, y_train, y_test):
    model.fit(X_train,y_train)
    return model.score(X_test,y_test)

for x_index, y_index in kfold.split(digits.data,digits.target):
    X_train, X_test, y_train, y_test = digits.data[x_index],digits.data[y_index],digits.target[x_index],digits.target[y_index]
    le_score.append(get_score(LogisticRegression(),X_train, X_test, y_train, y_test))
    svc_score.append(get_score(SVC(C=1.0,kernel='linear'),X_train, X_test, y_train, y_test))
    ran_score.append(get_score(RandomForestClassifier(n_estimators=40),X_train, X_test, y_train, y_test))

In [32]:
print(le_score)
print(svc_score)
print(ran_score)

[0.9722222222222222, 0.9666666666666667, 0.9666666666666667]
[0.9777777777777777, 0.9722222222222222, 0.9611111111111111]
[0.9666666666666667, 0.9722222222222222, 0.9666666666666667]


In [33]:
from sklearn.model_selection import cross_val_score
cross_val_score(LogisticRegression(),digits.data,digits.target,cv=3)

array([0.89534884, 0.94991653, 0.90939597])

In [34]:
cross_val_score(SVC(C=1.0, kernel='linear'),digits.data,digits.target,cv=3)

array([0.93521595, 0.95826377, 0.93791946])

In [35]:
cross_val_score(RandomForestClassifier(n_estimators = 40),digits.data, digits.target,cv = 3)

array([0.93521595, 0.94323873, 0.92785235])