In [1]:
import pandas as pd
import numpy as np

In [22]:
import warnings
warnings.filterwarnings(action='ignore')

<h4 class="text-center"> Grid Search CV </h4>

In [2]:
from sklearn import datasets

In [3]:
#creating the dataframe
iris = datasets.load_iris()

In [4]:
feature = iris.data
target = iris.target

In [5]:
#### Creating a Logistic Regression
from sklearn.linear_model import LogisticRegression

In [6]:
logit = LogisticRegression(random_state=42)

<span class="badge"> HyperParameter </span>

In [10]:
param_dict = {
    'penalty': ['l1','l2'],
    'C': np.logspace(0,4,10)
}

In [11]:
param_dict

{'penalty': ['l1', 'l2'],
 'C': array([1.00000000e+00, 2.78255940e+00, 7.74263683e+00, 2.15443469e+01,
        5.99484250e+01, 1.66810054e+02, 4.64158883e+02, 1.29154967e+03,
        3.59381366e+03, 1.00000000e+04])}

<span class="badge"> Grid Search CV </span>

In [12]:
from sklearn.model_selection import GridSearchCV

In [19]:
grid_search = GridSearchCV(logit,param_grid=param_dict, cv=5,verbose=False)

In [23]:
grid_search.fit(feature,target)

GridSearchCV(cv=5, estimator=LogisticRegression(random_state=42),
             param_grid={'C': array([1.00000000e+00, 2.78255940e+00, 7.74263683e+00, 2.15443469e+01,
       5.99484250e+01, 1.66810054e+02, 4.64158883e+02, 1.29154967e+03,
       3.59381366e+03, 1.00000000e+04]),
                         'penalty': ['l1', 'l2']},
             verbose=False)

In [24]:
grid_search.best_params_ # LogisticRegression(C:7.8, penalty:'l2')

{'C': 7.742636826811269, 'penalty': 'l2'}

In [29]:
grid_search.best_estimator_.get_params()['penalty']

'l2'

In [30]:
grid_search.best_estimator_.get_params()['C']

7.742636826811269

In [31]:
grid_search.best_estimator_.get_params() 

{'C': 7.742636826811269,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 42,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

<h4 class="text-center"> Randomized Search CV </h4>

In [35]:
from sklearn.model_selection import RandomizedSearchCV

In [39]:
random_search = RandomizedSearchCV(logit,param_distributions=param_dict,cv=5,n_iter=100,verbose=True)

In [40]:
random_search.fit(feature,target)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    2.3s finished


RandomizedSearchCV(cv=5, estimator=LogisticRegression(random_state=42),
                   n_iter=100,
                   param_distributions={'C': array([1.00000000e+00, 2.78255940e+00, 7.74263683e+00, 2.15443469e+01,
       5.99484250e+01, 1.66810054e+02, 4.64158883e+02, 1.29154967e+03,
       3.59381366e+03, 1.00000000e+04]),
                                        'penalty': ['l1', 'l2']},
                   verbose=True)

In [41]:
random_search.best_params_

{'penalty': 'l2', 'C': 7.742636826811269}

In [43]:
random_search.best_estimator_.get_params()

{'C': 7.742636826811269,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': 42,
 'solver': 'lbfgs',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

<h4 class="text-center"> Multiple Algorithm </h4>

In [44]:
from sklearn.ensemble import RandomForestClassifier

In [46]:
#creating a pipeline
from sklearn.pipeline import Pipeline

In [47]:
make_pipe = Pipeline( [ ('classifier', RandomForestClassifier()) ] )

In [49]:
### Creating a dictionary for logiticRegression and RandomForestClassifier

In [50]:
search_model = [
    {
        'classifier':[LogisticRegression()],
        'classifier__penalty':['l1','l2'],
        'classifier__C': np.logspace(0,4,10)
    },
    {
        'classifier':[RandomForestClassifier()],
        'classifier__n_estimators':[100,500,1000],
        'classifier__max_features':[1,2,3]
    }
]

In [51]:
multi_grid_search = GridSearchCV(make_pipe,search_model,cv=5,verbose=0)

In [53]:
multi_grid_search.fit(feature,target)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('classifier',
                                        RandomForestClassifier())]),
             param_grid=[{'classifier': [LogisticRegression(C=7.742636826811269)],
                          'classifier__C': array([1.00000000e+00, 2.78255940e+00, 7.74263683e+00, 2.15443469e+01,
       5.99484250e+01, 1.66810054e+02, 4.64158883e+02, 1.29154967e+03,
       3.59381366e+03, 1.00000000e+04]),
                          'classifier__penalty': ['l1', 'l2']},
                         {'classifier': [RandomForestClassifier()],
                          'classifier__max_features': [1, 2, 3],
                          'classifier__n_estimators': [100, 500, 1000]}])

In [54]:
multi_grid_search.best_params_

{'classifier': LogisticRegression(C=7.742636826811269),
 'classifier__C': 7.742636826811269,
 'classifier__penalty': 'l2'}

In [58]:
multi_grid_search.predict(feature)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [59]:
target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

<h4 class="text-center"> Preprocessing to Model select </h4>

In [60]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import FeatureUnion

In [61]:
#preprocess
preprocess = FeatureUnion([
    ('std',StandardScaler()),
    ('pca',PCA())
])

In [62]:
#create a pipeline
pipe_two = Pipeline([
    ('preprocess',preprocess),
    ('classifier',LogisticRegression())
])

In [63]:
search_model_two = [
    {
        'preprocess__pca__n_components':[1,2,3],
        'classifier__penalty':['l1','l2'],
        'classifier__C':np.logspace(0,4,10)
    }
]

In [64]:
#grid Search Cv
logit_grid = GridSearchCV(pipe_two,search_model_two,cv=5,verbose=0,n_jobs=-1)

In [65]:
logit_grid.fit(feature,target)

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('preprocess',
                                        FeatureUnion(transformer_list=[('std',
                                                                        StandardScaler()),
                                                                       ('pca',
                                                                        PCA())])),
                                       ('classifier', LogisticRegression())]),
             n_jobs=-1,
             param_grid=[{'classifier__C': array([1.00000000e+00, 2.78255940e+00, 7.74263683e+00, 2.15443469e+01,
       5.99484250e+01, 1.66810054e+02, 4.64158883e+02, 1.29154967e+03,
       3.59381366e+03, 1.00000000e+04]),
                          'classifier__penalty': ['l1', 'l2'],
                          'preprocess__pca__n_components': [1, 2, 3]}])

In [66]:
logit_grid.best_params_

{'classifier__C': 7.742636826811269,
 'classifier__penalty': 'l2',
 'preprocess__pca__n_components': 2}