# Selecting Best Models from Multiple Learning

You want to select the best model by seaching overa range of learning algorithms and their respective hyperparameters.

Creating a disctionary of candidate learning algorithms and their hyperparameters.

### Multiple Learning

Include some learning algorithms as part of the search space. 

In this example there are two models with their own hyperparameters and we define their candidates values using the format classifier__[hyperparameter name] 

In [16]:
# Load libraries

import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [17]:
# Set Random Seed 

np.random.seed(0)

In [18]:
# Load data

iris = datasets.load_iris()

# Create Feature matrix and target vector

features = iris.data
target = iris.target

In [19]:
pipe = Pipeline([("classifier", RandomForestClassifier())])

In [20]:
# Create dictionary  with candidate learning algorithms and their hyperparameters

# Define the the set of possible values for regularization hyperparameter space, C, and potential types of regularization penalties, "penalty"

search_space = [{"classifier": [LogisticRegression(max_iter = 1000, solver = "liblinear")],
                 "classifier__penalty":["l1", "l2"],
                 "classifier__C": np.logspace(0,4,10)},
               
                {"classifier": [RandomForestClassifier()],
                 "classifier__n_estimators": [10,100,1000],
                 "classifier__max_features": [1,2,3]}]

# Dictionary for Random Forest Hyperparameters

search_space

[{'classifier': [LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                      intercept_scaling=1, l1_ratio=None, max_iter=1000,
                      multi_class='auto', n_jobs=None, penalty='l2',
                      random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                      warm_start=False)],
  'classifier__penalty': ['l1', 'l2'],
  'classifier__C': array([1.00000000e+00, 2.78255940e+00, 7.74263683e+00, 2.15443469e+01,
         5.99484250e+01, 1.66810054e+02, 4.64158883e+02, 1.29154967e+03,
         3.59381366e+03, 1.00000000e+04])},
 {'classifier': [RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                          criterion='gini', max_depth=None, max_features='auto',
                          max_leaf_nodes=None, max_samples=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
           

In [21]:
# Create GridSearch

gridsearch = GridSearchCV(pipe, search_space, cv=5, verbose=0)
gridsearch

GridSearchCV(cv=5, error_score=nan,
             estimator=Pipeline(memory=None,
                                steps=[('classifier',
                                        RandomForestClassifier(bootstrap=True,
                                                               ccp_alpha=0.0,
                                                               class_weight=None,
                                                               criterion='gini',
                                                               max_depth=None,
                                                               max_features='auto',
                                                               max_leaf_nodes=None,
                                                               max_samples=None,
                                                               min_impurity_decrease=0.0,
                                                               min_impurity_split=None,
                                       

In [22]:
# Fit Grid Search

best_model = gridsearch.fit(features,target)

After the search is completed we can use best_estimator_ to view best model's learning algorithm and hyperparameters

In [23]:
# See the hyperparameters of the best model

print("Best Model: ", best_model.best_estimator_.get_params()["classifier"])


Best Model:  LogisticRegression(C=7.742636826811269, class_weight=None, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=1000, multi_class='auto', n_jobs=None, penalty='l1',
                   random_state=None, solver='liblinear', tol=0.0001, verbose=0,
                   warm_start=False)


We can use this model to predict values just like any other scikit-learng model.

In [24]:
# Predict Target Vector


best_model.predict(features)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])