In [8]:
import numpy as np
import sklearn
from sklearn import datasets
from sklearn.linear_model import LogisticRegression

# Selecting Best model using searching methods

## GridSearchCV or Exhaustive Search

In [78]:
iris = datasets.load_iris()

In [79]:
X, y = iris.data, iris.target

In [80]:
logreg = LogisticRegression(max_iter=10000)

In [81]:
penalty = ['l1','l2']

In [82]:
C = [0.001,0.003,0.01,0.03,0.1,0.3,1,3,10]

In [83]:
hyperparams = dict(C=C,penalty=penalty)

In [84]:
hyperparams

{'C': [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10], 'penalty': ['l1', 'l2']}

In [85]:
from sklearn.model_selection import GridSearchCV

In [86]:
gridscv = GridSearchCV(logreg,param_grid=hyperparams,cv=5,n_jobs=-1,verbose=3)

In [87]:
best_model = gridscv.fit(X,y)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  90 out of  90 | elapsed:    0.5s finished


In [88]:
best_model.best_params_

{'C': 1, 'penalty': 'l2'}

In [89]:
best_model.best_score_

0.9733333333333334

## RandomizedSearchCV

In [90]:
import scipy
from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV

In [91]:
X,y = iris.data, iris.target

In [92]:
logreg = LogisticRegression(max_iter=1000)

In [93]:
C = uniform(loc=0, scale=4)

In [94]:
penalty = ['l1','l2']

In [95]:
hyperparams = dict(C=C,penalty=penalty)

In [96]:
randomsearchcv = RandomizedSearchCV(logreg,
                                    param_distributions=hyperparams,
                                    cv=5,
                                    n_jobs=-1,
                                    verbose=3)

In [100]:
best_model = randomsearchcv.fit(X,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  50 out of  50 | elapsed:    0.6s finished


In [101]:
best_model.best_params_

{'C': 1.3398073351700628, 'penalty': 'l2'}

In [102]:
best_model.best_score_

0.9733333333333334

In [103]:
# help on scipy.stats.uniform

In [105]:
uniform(loc=0,scale=4).rvs(10)

array([2.57139285, 1.3364068 , 2.80656928, 2.70116635, 0.64032357,
       0.79213691, 3.03076017, 1.79064331, 0.17893473, 2.97429337])

# Selecting best learning algorithm

In [109]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [110]:
X,y = iris.data, iris.target

In [114]:
pipe = Pipeline([('classifier',RandomForestClassifier())])

In [119]:
search_space = [{
    'classifier':[LogisticRegression(max_iter=1000)],
    'classifier__penalty':['l1','l2'],
    'classifier__C':[0.001,0.003,0.01,0.03,0.1,0.3,1,3,10]
},
{
    'classifier':[RandomForestClassifier()],
    'classifier__n_estimators':[10,100,1000],
    'classifier__max_features':[1,2,3]
}]

In [120]:
gridsearchcv = GridSearchCV(pipe,search_space,cv=5,n_jobs=-1,verbose=3)

In [121]:
best_model = gridsearchcv.fit(X,y)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-1)]: Done 135 out of 135 | elapsed:   14.8s finished


In [122]:
best_model.best_estimator_

Pipeline(steps=[('classifier', LogisticRegression(C=1, max_iter=1000))])

In [123]:
best_model.best_params_

{'classifier': LogisticRegression(C=1, max_iter=1000),
 'classifier__C': 1,
 'classifier__penalty': 'l2'}

In [124]:
best_model.best_score_

0.9733333333333334