<a href="https://colab.research.google.com/github/rahulrajpr/AdvancedHyperParameterOptimisation/blob/HyperOpt/HyperOpt_MultiModel_Search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Hyperopt MultiModel Optimization**

 - Optimizing mutiple models with thier own hyperparameter spaces in a seach using the customisability of the hyperopt library

 - The hyper parameter space design includes the follwing

 1. The Model
 2. The Model - Hyperparameters

 3. The Search finds the best model as well as the best hyperprameters for the model

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.datasets import load_breast_cancer

In [3]:
dataset = load_breast_cancer()
data = dataset.data
target = dataset.target
columns = dataset.feature_names
df = pd.DataFrame(data = data, columns = columns)
df['target'] = target
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         5

In [4]:
X = df.drop('target', axis =1)
y = df['target']

In [5]:
from sklearn.model_selection import train_test_split

In [6]:
X_train,X_test,y_train,y_test = train_test_split(X,y,train_size = 0.7, random_state = 100, stratify = y)

Define the parameter space

In [7]:
from hyperopt import hp

In [28]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

Lets Define the hyperparameter space with optinal model and their hyperparameters

In [32]:
param_grid = hp.choice('classifier', [
    
    # algo 1
    {'model': LogisticRegression,
    'params': {
        'penalty': hp.choice('penalty', ['l1','l2']),
        'C' : hp.uniform('C', 0.001, 10),
        'solver': 'saga', # the only solver that works with both penalties
    }},
    
    # algo 2
    {'model': RandomForestClassifier,
    'params': {
        'n_estimators': hp.quniform('n_estimators_rf', 50, 1500, 50),
        'max_depth': hp.quniform('max_depth_rf', 1, 5, 1),
        'criterion': hp.choice('criterion_rf', ['gini', 'entropy']),
    }},
    
    # algo 3
    {'model': GradientBoostingClassifier,
    'params': {
        'n_estimators': hp.quniform('n_estimators_gbm', 50, 1500, 50),
        'max_depth': hp.quniform('max_depth_gbm', 1, 5, 1),
        'criterion': hp.choice('criterion_gbm', ['friedman_mse', 'mse']),
    }},
])

Define the objective Function

In [13]:
from sklearn.model_selection import cross_val_score

In [33]:
def objective(params):

  model = params['model']()
  param_space = params['params']

  try:

    param_space['n_estimators'] = int(param_space['n_estimators'])
    param_space['max_depth'] = int(param_space['max_depth'])

  except:
    pass

  model.set_params(**param_space)

  scores = cross_val_score(estimator = model,
                           X = X_train,
                           y = y_train,
                           scoring = 'accuracy',
                           cv = 5,
                           n_jobs = -1)
  
  mean = np.mean(scores)

  return -mean

Define and run the search

In [16]:
from hyperopt import fmin
from hyperopt import anneal
from hyperopt import Trials

In [34]:
trials = Trials()
search = fmin(fn = objective,
              space = param_grid,
              algo = anneal.suggest,
              max_evals = 50,
              rstate = np.random.RandomState(100),
              trials = trials)
%time

100%|██████████| 50/50 [05:08<00:00,  6.17s/it, best loss: -0.9597151898734178]
CPU times: user 6 µs, sys: 1 µs, total: 7 µs
Wall time: 10.7 µs


Explore the search results

In [35]:
search

{'classifier': 2,
 'criterion_gbm': 1,
 'max_depth_gbm': 1.0,
 'n_estimators_gbm': 1300.0}

In [42]:
# best parameters
trials.argmin

{'classifier': 2,
 'criterion_gbm': 1,
 'max_depth_gbm': 1.0,
 'n_estimators_gbm': 1300.0}

In [44]:
# best accuracy
trials.average_best_error()

-0.9597151898734178

In [45]:
trials.best_trial

{'state': 2,
 'tid': 35,
 'spec': None,
 'result': {'loss': -0.9597151898734178, 'status': 'ok'},
 'misc': {'tid': 35,
  'cmd': ('domain_attachment', 'FMinIter_Domain'),
  'workdir': None,
  'idxs': {'C': [],
   'classifier': [35],
   'criterion_gbm': [35],
   'criterion_rf': [],
   'max_depth_gbm': [35],
   'max_depth_rf': [],
   'n_estimators_gbm': [35],
   'n_estimators_rf': [],
   'penalty': []},
  'vals': {'C': [],
   'classifier': [2],
   'criterion_gbm': [1],
   'criterion_rf': [],
   'max_depth_gbm': [1.0],
   'max_depth_rf': [],
   'n_estimators_gbm': [1300.0],
   'n_estimators_rf': [],
   'penalty': []}},
 'exp_key': None,
 'owner': None,
 'version': 0,
 'book_time': datetime.datetime(2023, 1, 3, 17, 37, 12, 998000),
 'refresh_time': datetime.datetime(2023, 1, 3, 17, 37, 20, 284000)}

Lets Train the model - final

In [39]:
# best algorithm is Gradient boosting classifier
best_params = {'n_estimators' : 1300,
               'max_depth' : 1,
               'criterion':'friedman_mse'}

In [37]:
best_model = GradientBoostingClassifier(**best_params)

In [38]:
best_model.fit(X_train,y_train)

GradientBoostingClassifier(max_depth=1, n_estimators=1300)

Lets Evaluate the model

In [46]:
from sklearn.metrics import accuracy_score

In [47]:
print('accuracy of train set : ', accuracy_score(y_train, best_model.predict(X_train)))
print('accuracy of test set : ', accuracy_score(y_test, best_model.predict(X_test)))

accuracy of train set :  1.0
accuracy of test set :  0.9824561403508771
