In [40]:
from sklearn import svm,datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

In [41]:
iris = datasets.load_iris()

In [42]:
import pandas as pd
df = pd.DataFrame(iris.data, columns= iris.feature_names)
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [43]:
model_params = {
    'svm' :{
        'model': svm.SVC(gamma = 'auto'),
        'params' : {
            'C' : [1,10,20],
            'kernel' : ['rbf','linear']
        }
    },
    'random_forest' : {
        'model': RandomForestClassifier(),
        'params' : {'n_estimators' : [1,5,10]}
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver = 'liblinear', multi_class= 'auto'),
        'params' : {
            'C' : [1,10,20]
        }
    }
}

In [44]:
scores = []
for model_name,mp in model_params.items():
    clf = GridSearchCV(mp['model'],mp['params'], cv=5, return_train_score=False)
    clf.fit(iris.data, iris.target)
    scores.append({
        'model' : model_name,
        'best_score' : clf.best_score_,
        'best_params' : clf.best_params_
    })

In [45]:
df = pd.DataFrame(scores,columns = ['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.966667,{'n_estimators': 5}
2,logistic_regression,0.966667,{'C': 10}


### Excercise : Machine Learning finding optimal params and hyperparameters in Handwriten digits dataset

In [46]:
from sklearn import svm, ensemble, linear_model, tree, neighbors, naive_bayes, datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [47]:
digits = datasets.load_digits()

In [48]:
dir(digits)

['DESCR', 'data', 'feature_names', 'frame', 'images', 'target', 'target_names']

In [49]:
digits_df =  pd.DataFrame(digits.data, columns = digits.feature_names)


In [50]:
digits_df

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_6,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,5.0,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,9.0,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0,0.0,4.0,10.0,13.0,6.0,0.0,0.0,0.0,1.0,...,4.0,0.0,0.0,0.0,2.0,14.0,15.0,9.0,0.0,0.0
1793,0.0,0.0,6.0,16.0,13.0,11.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,6.0,16.0,14.0,6.0,0.0,0.0
1794,0.0,0.0,1.0,11.0,15.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,9.0,13.0,6.0,0.0,0.0
1795,0.0,0.0,2.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,...,2.0,0.0,0.0,0.0,5.0,12.0,16.0,12.0,0.0,0.0


In [51]:
model_params = {
    'svm' :{
        'model': svm.SVC(gamma = 'auto'),
        'params' : {
            'C' : [1,10,20],
            'kernel' : ['rbf','linear']
        }
    },
    'random_forest' : {
        'model': RandomForestClassifier(),
        'params' : {'n_estimators' : [1,5,10]}
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver = 'liblinear', multi_class= 'auto'),
        'params' : {
            'C' : [1,10,20]
        }
    },
    'naive_bayes_gaussian': {
        'model': naive_bayes.GaussianNB(),
        'params': {}
    },
    'naive_bayes_multinomial': {
        'model': naive_bayes.MultinomialNB(alpha=1.0, fit_prior=True),
        'params': {
            'alpha': [1.0, 0.1, 0.01],
            'fit_prior': [True, False]
        }
    },
    'decision_tree': {
        'model': tree.DecisionTreeClassifier(criterion='gini', max_depth=None),
        'params': {
            'max_depth': [None, 10, 20],
            'min_samples_split': [2, 5],
            'criterion': ['gini', 'entropy']
        }
    }
}

In [52]:

# Prepare for grid search
scores = []
for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
    clf.fit(digits.data, digits.target)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })

In [53]:
df = pd.DataFrame(scores,columns = ['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.905432,{'n_estimators': 10}
2,logistic_regression,0.922114,{'C': 1}
3,naive_bayes_gaussian,0.806928,{}
4,naive_bayes_multinomial,0.870907,"{'alpha': 0.1, 'fit_prior': True}"
5,decision_tree,0.817501,"{'criterion': 'entropy', 'max_depth': 20, 'min..."
