## Excercise

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB

In [2]:
data = load_digits()

In [3]:
X = pd.DataFrame(data.data, columns=data.feature_names)
Y = data.target
X.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,9.0,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0


In [4]:
model_with_hyperparams = {
    'Logistic Regression':{
        "model": LogisticRegression(solver='liblinear', multi_class='auto'),
        "params":{'C': [1,5,10,20]},
},
    'Random Forest':{
        "model": RandomForestClassifier(),
        "params":{'n_estimators': [1,5,10,20]},
},
    'SVC':{
        "model": SVC(gamma='auto'),
        "params":{
            'kernel':['rbf','linear'],
            'C': [5,10,15,20]},
},
    'Decision Tree':{
        "model": DecisionTreeClassifier(),
        "params":{'criterion': ['gini','entropy']},
},
    'Multinomial Naive Bayes':{
        "model": MultinomialNB(),
        "params":{},
},
    'Gaussion Naive Bayes':{
        "model": GaussianNB(),
        "params":{},
},
    'Bernoulli Naive Bayes':{
        "model": BernoulliNB(),
        "params":{},
}
}

In [6]:
best_model = []

for model, config in model_with_hyperparams.items():
    clf = GridSearchCV(config['model'], config['params'], cv=5, return_train_score=False)
    clf.fit(X,Y)
    best_model.append({'model':model,'score': clf.best_score_, 'params': clf.best_params_})

In [7]:
df = pd.DataFrame(best_model)
df

Unnamed: 0,model,score,params
0,Logistic Regression,0.922114,{'C': 1}
1,Random Forest,0.923779,{'n_estimators': 20}
2,SVC,0.947697,"{'C': 5, 'kernel': 'linear'}"
3,Decision Tree,0.807479,{'criterion': 'entropy'}
4,Multinomial Naive Bayes,0.87035,{}
5,Gaussion Naive Bayes,0.806928,{}
6,Bernoulli Naive Bayes,0.824174,{}
