In [70]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import sklearn.datasets as ds
import sklearn.svm as sv
import sklearn.ensemble as en
import sklearn.model_selection as ms
import sklearn.linear_model as lm

In [71]:
digits = ds.load_digits()
X_train,X_test,y_train,y_test = ms.train_test_split(digits.data,digits.target,test_size=0.3)

## Logistic Regression

In [72]:
l_model = lm.LogisticRegression(multi_class='auto',solver='lbfgs')
l_model.fit(X_train,y_train)
l_model.score(X_test,y_test)



0.9666666666666667

## SVM

In [83]:
svc = sv.SVC(gamma='scale')
svc.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

## Random Forest

In [74]:
rf = en.RandomForestClassifier(n_estimators=40)
rf.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=40,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [75]:
def get_score(model, X_train, X_test, y_train, y_test):
    model.fit(X_train,y_train)
    return model.score(X_test,y_test)

## KFold cross validation

In [76]:
kf = ms.KFold(n_splits=3)
kf

KFold(n_splits=3, random_state=None, shuffle=False)

In [77]:
for train_index,test_index in kf.split([1,2,3,4,5,6,7,8,9]):
    print(train_index,test_index)

[3 4 5 6 7 8] [0 1 2]
[0 1 2 6 7 8] [3 4 5]
[0 1 2 3 4 5] [6 7 8]


In [78]:
stratified_kfold = ms.StratifiedKFold(n_splits=5)
stratified_kfold

StratifiedKFold(n_splits=5, random_state=None, shuffle=False)

In [84]:
logistic_scores = []
svm_scores = []
rf_scores = []

for train_index,test_index in stratified_kfold.split(digits.data,digits.target):
    X_train, X_test, y_train, y_test = digits.data[train_index],digits.data[test_index],\
                                        digits.target[train_index],digits.target[test_index]
    logistic_scores.append(get_score(l_model,X_train, X_test, y_train, y_test))
    svm_scores.append(get_score(svc,X_train, X_test, y_train, y_test))
    rf_scores.append(get_score(rf,X_train, X_test, y_train, y_test))



In [92]:
sum(logistic_scores)/len(logistic_scores)

0.9149169383167782

In [93]:
sum(svm_scores)/len(logistic_scores)

0.9638434678923486

In [94]:
sum(rf_scores)/len(logistic_scores)

0.9305262092983376

## Cross Validation

In [96]:
ms.cross_val_score(l_model,digits.data,digits.target,cv=5)



array([0.92032967, 0.87016575, 0.94428969, 0.93557423, 0.90422535])

In [97]:
ms.cross_val_score(svc,digits.data,digits.target,cv=5)

array([0.96153846, 0.94475138, 0.98328691, 0.98879552, 0.94084507])

In [98]:
ms.cross_val_score(rf,digits.data,digits.target,cv=5)

array([0.93131868, 0.90331492, 0.95264624, 0.9719888 , 0.90140845])