## importing data set of digits---------------------------------------------------------

In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_digits
my_digits = load_digits()

In [None]:
dir(my_digits)

In [None]:
my_digits.target_names

In [None]:
my_digits.target[5:8]

In [None]:
my_digits.data[5:8]

## creating dataframe and adding target column-------------------------------

In [None]:
df_digits = pd.DataFrame(my_digits.data)
df_digits.head()

In [None]:
df_digits['target']=my_digits.target
df_digits.head()

## Creating input and output variables---------------------------------------------

In [None]:
x = df_digits.drop(['target'], axis='columns')
y = df_digits.target

In [None]:
x.head(5)

In [None]:
y.head(5)

## Split data set into train and test set---------------------------------------------

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

In [None]:
x_train[50:55]

## Logistic Regression model---------------------------------------------------------

In [None]:
from sklearn.linear_model import LogisticRegression
model_lr = LogisticRegression(solver='newton-cg')

In [None]:
model_lr.fit(x_train,y_train)

## Support Vector Model----------------------------------------------------------------

In [None]:
from sklearn.svm import SVC
model_svm = SVC()

In [None]:
model_svm.fit(x_train,y_train)

## Random Forest Classifier-----------------------------------------------------------

In [None]:
from sklearn.ensemble import RandomForestClassifier
model_rfc = RandomForestClassifier()

In [None]:
model_rfc.fit(x_train,y_train)

## Comparing scores---------------------------------------------------------------------

In [None]:
model_lr.score(x_test,y_test)

In [None]:
model_svm.score(x_test,y_test)

In [None]:
model_rfc.score(x_test,y_test)

## Learning to use KFold----------------------------------------------------------------

In [None]:
from sklearn.model_selection import KFold
kf = KFold(n_splits = 3)
kf

In [None]:
def get_score(model,x_train,x_test,y_train,y_test):
    model.fit(x_train,y_train)
    return model.score(x_test,y_test)

In [None]:
get_score(LogisticRegression(solver='newton-cg'),x_train,x_test,y_train,y_test)

In [None]:
from sklearn.model_selection import StratifiedKFold
my_kfolds = StratifiedKFold(n_splits = 3)

In [None]:
scores_lr = []
scores_svm = []
scores_rfc = []

for train_index, test_index in my_kfolds.split(my_digits.data, my_digits.target):
    x_train,x_test,y_train,y_test = my_digits.data[train_index],\
                                    my_digits.data[test_index],\
                                    my_digits.target[train_index],\
                                    my_digits.target[test_index]
    scores_lr.append(get_score(LogisticRegression(solver='newton-cg'),x_train,x_test,y_train,y_test))
    scores_svm.append(get_score(SVC(),x_train,x_test,y_train,y_test))
    scores_rfc.append(get_score(RandomForestClassifier(n_estimators=5),x_train,x_test,y_train,y_test))

In [None]:
print("Logistic Regression:")
print(scores_lr)
print("\nSupport vector Machine:")
print(scores_svm)
print("\nRandom Forest Classifier:")
print(scores_rfc)

## Using cross_val_score---------------------------------------------------------------

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
cross_val_score(LogisticRegression(solver="newton-cg"), my_digits.data, my_digits.target, cv=10)

#### change 'cv' to change number of folds used in model validation

In [None]:
cross_val_score(SVC(), my_digits.data, my_digits.target)

In [None]:
cross_val_score(RandomForestClassifier(), my_digits.data, my_digits.target)