In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split,KFold,StratifiedKFold,cross_val_score,GridSearchCV
from sklearn.preprocessing import MinMaxScaler,StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB

from sklearn.datasets import load_digits

# Dataset

In [2]:
digits = load_digits()
digits.keys()

dict_keys(['data', 'target', 'frame', 'feature_names', 'target_names', 'images', 'DESCR'])

In [3]:
df = pd.DataFrame(digits.data, columns=digits.feature_names)
df.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,9.0,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0


# Normalization

In [4]:
scaler = MinMaxScaler()
df2 = pd.DataFrame(scaler.fit_transform(df),columns=df.columns)
df2.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,0.3125,0.8125,0.5625,0.0625,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.375,0.8125,0.625,0.0,0.0,0.0
1,0.0,0.0,0.0,0.75,0.8125,0.3125,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.6875,1.0,0.625,0.0,0.0
2,0.0,0.0,0.0,0.25,0.9375,0.75,0.0,0.0,0.0,0.0,...,0.3125,0.0,0.0,0.0,0.0,0.1875,0.6875,1.0,0.5625,0.0
3,0.0,0.0,0.4375,0.9375,0.8125,0.0625,0.0,0.0,0.0,0.5,...,0.5625,0.0,0.0,0.0,0.4375,0.8125,0.8125,0.5625,0.0,0.0
4,0.0,0.0,0.0,0.0625,0.6875,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.125,1.0,0.25,0.0,0.0


# GridSearchCV

In [7]:
algos = {
    'Logistic_Regression' : [LogisticRegression(max_iter=1000),{
        'C' : np.arange(1,10)
    }],
    'SVM' : [SVC(),{
        'C' : np.arange(1,10),
        'kernel' : ['rbf','linear','poly','sigmoid']
    }],
    'DescisionTree' : [DecisionTreeClassifier(),{
        'criterion' : ['gini','entropy']
    }],
    'RandomForest' : [RandomForestClassifier(),{
        'criterion' : ['gini','entropy'],
        'n_estimators' : np.arange(100,500,50)
    }] 
}

In [8]:
algorithm = []
best_parameters = []
best_score = []

for i,j in algos.items():
    gscv = GridSearchCV(j[0],j[1],cv = 10)
    gscv.fit(df2,digits.target)
    
    algorithm.append(i)
    best_score.append(gscv.best_score_)
    best_parameters.append(gscv.best_params_)

In [9]:
for i in range(4):
    print(f'{algorithm[i]}---{best_parameters[i]}---{best_score[i]}')

Logistic_Regression---{'C': 1}---0.9409900682805711
SVM---{'C': 5, 'kernel': 'rbf'}---0.9827374301675977
DescisionTree---{'criterion': 'gini'}---0.8241185599006828
RandomForest---{'criterion': 'entropy', 'n_estimators': 300}---0.9538081936685288
