In [16]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits

In [17]:
digits = load_digits()

In [18]:

dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [19]:
data = pd.DataFrame(digits.data, columns=digits.feature_names)

In [20]:
digits.target_names

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])


data.head()


In [21]:
data['target'] = digits.target

In [22]:
#add ftarget name with atrget

data['t_name'] = data.target.apply(lambda x : digits.target_names[x] )
data.head(5)

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target,t_name
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4,4


In [23]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [24]:
model_params = {
    'svm': {
        'model': SVC(gamma='auto'),
        'params': {
            'C': [1, 10, 20],
            'kernel': ['rbf', 'linear']
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': [1, 5, 10]
        }
    },
    'logistic_regression': {
        'model': LogisticRegression(solver='liblinear', multi_class='auto'),
        'params': {
            'C': [1, 5, 10]
        }
    },
    'naive_bayes_gaussian': {
        'model': GaussianNB(),
        'params': {}
    },
    'naive_bayes_multinomial': {
        'model': MultinomialNB(),
        'params': {}
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini', 'entropy'],

        }
    }
}

In [27]:
#train te model
from sklearn.model_selection import GridSearchCV


scores =[]
for model_name,md in model_params.items():
    clf= GridSearchCV(md['model'],md['params'], cv=5, return_train_score = False)
    clf.fit(digits.data,digits.target)
    scores.append({
        'model':model_name,
        'best_score':clf.best_score_ ,
        'best_params':clf.best_params_   })
    print("The score of all is {}".format(scores))

The score of all is [{'model': 'svm', 'best_score': np.float64(0.9476973073351903), 'best_params': {'C': 1, 'kernel': 'linear'}}]
The score of all is [{'model': 'svm', 'best_score': np.float64(0.9476973073351903), 'best_params': {'C': 1, 'kernel': 'linear'}}, {'model': 'random_forest', 'best_score': np.float64(0.8965242958836275), 'best_params': {'n_estimators': 10}}]




The score of all is [{'model': 'svm', 'best_score': np.float64(0.9476973073351903), 'best_params': {'C': 1, 'kernel': 'linear'}}, {'model': 'random_forest', 'best_score': np.float64(0.8965242958836275), 'best_params': {'n_estimators': 10}}, {'model': 'logistic_regression', 'best_score': np.float64(0.9221138966264315), 'best_params': {'C': 1}}]
The score of all is [{'model': 'svm', 'best_score': np.float64(0.9476973073351903), 'best_params': {'C': 1, 'kernel': 'linear'}}, {'model': 'random_forest', 'best_score': np.float64(0.8965242958836275), 'best_params': {'n_estimators': 10}}, {'model': 'logistic_regression', 'best_score': np.float64(0.9221138966264315), 'best_params': {'C': 1}}, {'model': 'naive_bayes_gaussian', 'best_score': np.float64(0.8069281956050759), 'best_params': {}}]
The score of all is [{'model': 'svm', 'best_score': np.float64(0.9476973073351903), 'best_params': {'C': 1, 'kernel': 'linear'}}, {'model': 'random_forest', 'best_score': np.float64(0.8965242958836275), 'best

In [28]:
data= pd.DataFrame(scores)
data

Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.896524,{'n_estimators': 10}
2,logistic_regression,0.922114,{'C': 1}
3,naive_bayes_gaussian,0.806928,{}
4,naive_bayes_multinomial,0.87035,{}
5,decision_tree,0.813052,{'criterion': 'entropy'}
