In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [39]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB

In [55]:
models=[{
    'name':'logisticregression',
    'clf':LogisticRegression(),
    'pipeline':Pipeline([('scaler',StandardScaler()),('clf',LogisticRegression())]),
     'params':{
         'clf__C':[0.25,0.5,0.75,1.0]
     }
},
{   
    'name':'supportvectormachine',
    'clf':SVC(),
    'pipeline':Pipeline([('scaler',StandardScaler()),('clf',SVC())]),
    'params':{
    'clf__kernel':('rbf','poly','linear','sigmoid'),
    'clf__C':[0.5,1,5,8,10,15,20,30,40]
    }
},
{
    'name':'Randomforestclassifier',
    'clf':RandomForestClassifier(),
    'pipeline':Pipeline([('scaler',StandardScaler()),('clf',RandomForestClassifier())]),
    'params':{'clf__n_estimators':[50,75,100]}
},
{
    'name':'decisiontree',
    'clf':DecisionTreeClassifier(),
    'pipeline':Pipeline([('scaler',StandardScaler()),('clf',DecisionTreeClassifier())]),
    'params':{
        'clf__criterion':('gini','entropy','log_loss'),
        'clf__max_depth':[2,4,5,8,9]
    }
},
{
    'name':'multinomial',
    'clf':MultinomialNB(),
    'pipeline':Pipeline([('clf',MultinomialNB())]),
    'params':{
        'clf__alpha':[0.2,0.3,0.5,0.7,1.0]
    }
}
]

In [4]:
from sklearn.datasets import load_digits

In [5]:
data=load_digits()
dir(data)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [6]:
target=data['target']
len(target)

1797

In [7]:
input=data['data']
input

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train,X_test,y_train,y_test=train_test_split(input,target,test_size=0.2)

In [42]:
from sklearn.model_selection import GridSearchCV

Support vector machines(svm)

In [34]:
# params={'kernel':('linear', 'rbf','poly'), 'C':[1,5,8,10,15]}
# clf=GridSearchCV(SVC(),params,cv=5)
# clf.fit(X_train,y_train)
# clf.cv_results_

In [56]:
result=[]
for model in models:
    res=GridSearchCV(model['pipeline'],model['params'])
    res.fit(X_train,y_train)
    result.append({
            'Model':model['name'],
        'Best score':res.best_score_,
        'Best params':res.best_params_
    })
    

In [59]:
model_res=pd.DataFrame(result)
display=model_res[['Model','Best score','Best params']]
display

Unnamed: 0,Model,Best score,Best params
0,logisticregression,0.96032,{'clf__C': 0.25}
1,supportvectormachine,0.983991,"{'clf__C': 15, 'clf__kernel': 'poly'}"
2,Randomforestclassifier,0.972863,{'clf__n_estimators': 100}
3,decisiontree,0.8671,"{'clf__criterion': 'entropy', 'clf__max_depth'..."
4,multinomial,0.899804,{'clf__alpha': 0.2}
