## Grid Search

### Self Implementation

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [2]:
iris = load_iris()
X, y = iris.data, iris.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

### We are using Support vector machine to train a classifier

Checkout more hyperparameters for SVC here: https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

Define more hyperparameters in `param_grid` and play around with search space.

In [4]:
# Defining Search Space

# Input your code here to play around with Search Spaces
# define more hyperparameters

c = 0.001
gamma = 1e-10

param_grid = {
              "C": [c*(10**i) for i in range(1,14)],
              "gamma": [gamma*(10**i) for i in range(1,14)]
             }

In [5]:
from itertools import product

def make_sets(grid):
    """function makes all possible set from the grid above"""
    sets = list()
    all_hps_vals = [lst for lst in param_grid.values()]
    hp_keys = [hp for hp in param_grid.keys()]
    val_sets = product(*all_hps_vals)
    for val in val_sets:
        hp_set = dict()
        for idx, hp_key in enumerate(hp_keys):
            hp_set[hp_key] = val[idx]
        sets.append(hp_set)
    return sets

make_sets(param_grid)[:5]

[{'C': 0.01, 'gamma': 1e-09},
 {'C': 0.01, 'gamma': 1e-08},
 {'C': 0.01, 'gamma': 1.0000000000000001e-07},
 {'C': 0.01, 'gamma': 1e-06},
 {'C': 0.01, 'gamma': 1e-05}]

In [6]:
def grid_search(clf, grid, X_train, y_train, X_test, y_test):
    # iterates over all the sets
    all_sets = make_sets(grid)
    logs = list()
    best_hp_set = {
        "best_test_score": 0.0
    }
    for hp_set in all_sets:
        log = dict()
        model = clf(**hp_set)
        model.fit(X_train, y_train)
        train_score = model.score(X_train, y_train)
        test_score = model.score(X_test, y_test)
        
        log["hp"] = hp_set
        log["train_score"] = train_score
        log["test_score"] = test_score
        
        if best_hp_set["best_test_score"]<test_score:
            best_hp_set["best_test_score"] = test_score
            best_hp_set["hp_set"] = hp_set
        
        logs.append(log)
        
    return logs, best_hp_set

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

In [8]:
logs, best = grid_search(SVC, param_grid, X_train, y_train, X_test, y_test)

In [9]:
# print(best)

### Scikit Learn Implementaion

In [10]:
from sklearn.model_selection import GridSearchCV

In [11]:
# using k fold cross validation, here k=3

clf = GridSearchCV(SVC(), param_grid, cv=3)
clf.fit(X_train, y_train)

In [12]:
clf.best_estimator_

In [13]:
# print(f'Test Score: {clf.score(X_test, y_test)}')
# print(f'Train Score: {clf.score(X_train, y_train)}')

## Random Search

### Self Implementation

#### Change values here in `param_grid` to play with search space

In [14]:
from sklearn.svm import SVC


In [15]:
import random
import numpy as np

def loguniform(low=0, high=1, size=100, base=10):
    """function creates a log uniform distribution with 
    random values."""
    return np.power(base, np.random.uniform(low, high, size))

param_grid = {
              "gamma": loguniform(low=-10, high=4, base=10),
              "C": loguniform(low=-3, high=11, base=10)
             }

In [16]:
def get_random_hp_set(grid):
      # function chooses a random value for each from grid
    hp_set = dict()
    for key, param in grid.items():
             hp_set[key] = np.random.choice(param) 
    return hp_set

In [17]:
def random_search(clf, grid, n_iterations, X_train, y_train, X_test, y_test):
# defining function for random search    
    logs = list()
    best_hp_set = {
    "best_test_score": 0.0
    }

    for iteration in range(n_iterations):
        log = dict()

        # selecting the set of hyperparameters from function defined
        # for random search.
        hp_set = get_random_hp_set(grid)
        # print(hp_set)
        model = clf(**hp_set)
        model.fit(X_train, y_train)
        train_score = model.score(X_train, y_train)
        test_score = model.score(X_test, y_test)

        log["hp"] = hp_set
        log["train_score"] = train_score
        log["test_score"] = test_score

        if best_hp_set["best_test_score"]<test_score:
            best_hp_set["best_test_score"] = test_score
            best_hp_set["hp_set"] = hp_set

        logs.append(log)

    return logs, best_hp_set

In [18]:
logs, best = random_search(SVC, param_grid, 20, X_train, y_train, X_test, y_test)

In [19]:
best

{'best_test_score': 0.9666666666666667,
 'hp_set': {'gamma': 8.30932889992501e-05, 'C': 5476.079575132255}}

### Scikit Learn Implementation

In [20]:
from sklearn.model_selection import RandomizedSearchCV

In [21]:
clf = RandomizedSearchCV(SVC(), param_grid, n_iter=20, cv=3)
clf.fit(X_train, y_train)

In [22]:
clf.best_estimator_

In [23]:
# print(f'Test Score: {clf.score(X_test, y_test)}')
# print(f'Train Score: {clf.score(X_train, y_train)}')

## Dask

### Initialize the client first

### Define client such that you don't hang your system

In [24]:
from dask.distributed import Client
client = Client(processes=False, threads_per_worker=4, memory_limit='4GB', n_workers=1)

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [25]:
client

0,1
Client  Scheduler: inproc://192.168.244.37/288814/1  Dashboard: http://192.168.244.37:8787/status,Cluster  Workers: 1  Cores: 4  Memory: 4.00 GB


#### Use `client.dashboard_link` to monitor the distribution over your system cores

In [26]:
client.dashboard_link

'http://192.168.244.37:8787/status'

### Now we'll distribute a huge dataset, using Dask package

### Let's simply train a model first

In [27]:
from dask_ml import datasets
from dask_ml.model_selection import train_test_split
import dask.array as da
from dask_ml.wrappers import Incremental
from sklearn.linear_model import SGDClassifier

  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


In [28]:
X, y = datasets.make_classification(n_samples=100000000,
                                         n_features=7,
                                         random_state=0,
                                         chunks=100000)

In [29]:
X

Unnamed: 0,Array,Chunk
Bytes,5.60 GB,5.60 MB
Shape,"(100000000, 7)","(100000, 7)"
Count,1000 Tasks,1000 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 5.60 GB 5.60 MB Shape (100000000, 7) (100000, 7) Count 1000 Tasks 1000 Chunks Type float64 numpy.ndarray",7  100000000,

Unnamed: 0,Array,Chunk
Bytes,5.60 GB,5.60 MB
Shape,"(100000000, 7)","(100000, 7)"
Count,1000 Tasks,1000 Chunks
Type,float64,numpy.ndarray


In [30]:
y

Unnamed: 0,Array,Chunk
Bytes,800.00 MB,800.00 kB
Shape,"(100000000,)","(100000,)"
Count,12001 Tasks,1000 Chunks
Type,int64,numpy.ndarray
"Array Chunk Bytes 800.00 MB 800.00 kB Shape (100000000,) (100000,) Count 12001 Tasks 1000 Chunks Type int64 numpy.ndarray",100000000  1,

Unnamed: 0,Array,Chunk
Bytes,800.00 MB,800.00 kB
Shape,"(100000000,)","(100000,)"
Count,12001 Tasks,1000 Chunks
Type,int64,numpy.ndarray


In [31]:
classes = da.unique(y).compute()

In [32]:
classes

array([0, 1])

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

#### Note that we can only use modeling algorithms which supports Batch Training, to train model in batches

In [34]:
clf = SGDClassifier(loss='log', penalty='l2', tol=0.01)
# wrapping in Incremental
clf = Incremental(clf, scoring='accuracy')
clf.fit(X_train, y_train, classes=classes)
# while training check Client Dashboard



### For algorithms like SVC we can not train data in batches.

### But hyperparameter Optimization can be distributed, let's see how

In [35]:
from sklearn.datasets import load_digits
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.svm import SVC
import joblib
import time

In [36]:
# define a simple classifier with GridSearch

X, y = load_digits().data, load_digits().target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, shuffle=True)

c = 0.001
gamma = 1e-10
param_grid = {
              "C": [c*(10**i) for i in range(1,14)],
              "gamma": [gamma*(10**i) for i in range(1,14)]
             }

clf = SVC(kernel='rbf')
search = GridSearchCV(clf, param_grid, cv=3)

#### monitor dashboard after executing this next piece of code, and see how grid search is distributed over different cores

In [37]:
since = time.time()
with joblib.parallel_backend('dask', scatter=[X_train, y_train]):
    model = search.fit(X_train, y_train)
print(time.time()-since)

21.529071807861328


### Unfortunately for a large dataset, you cannot use both, an algorithm which train in batches and optimize hyperparameters as well

### Something like this would not work! Why?

In [38]:
# from dask_ml import datasets
# from dask_ml.wrappers import Incremental
# from dask_ml.model_selection import train_test_split, GridSearchCV
# from dask_ml.metrics import accuracy_score

# from sklearn.metrics import make_scorer
# from sklearn.linear_model import SGDClassifier

# import joblib

# import dask.array as da
# # from dask.distributed import Client
# # client = Client(processes=False)

# param_grid = {
#               "loss": ['hinge', 'log'],
#               "tol": [1e-2, 1e-3]
#              }

# X, y = datasets.make_classification(n_samples=100000,
#                                     n_features=7,
#                                     random_state=0,
#                                     chunks=10000)

# # providing an accuracy metrics from 'dask_ml'
# scorer = make_scorer(accuracy_score)

# X_train, X_test, y_train, y_test = train_test_split(X, y)

# clf = SGDClassifier(loss='log')
# clf_wrap = Incremental(clf, scoring=scorer)
# searh_clf = GridSearchCV(clf_wrap, param_grid, cv=3)

# with joblib.parallel_backend('dask'):
#     model = searh_clf.fit(X_train, y_train)

## HyperOpt

### A basic where we are optimizing, f(a,b) = a\*\*2 - b\*\*2

In [39]:
from hyperopt import tpe, fmin, hp

def objective_func(args):
    a = args['a']
    b = args['b']    
    f = a**2 - b**2
    return f

range_a = hp.uniform('a', -2, 3)
range_b = hp.uniform('b', -1, 2)

space = {'a': range_a,
            'b': range_b}

best = fmin(objective_func, space, algo=tpe.suggest, max_evals=100)

100%|██████████| 100/100 [00:00<00:00, 265.17trial/s, best loss: -3.9646943731079674]


In [40]:
best

{'a': 0.10372706482801239, 'b': 1.9938539758682936}

### Using Hyperopt to find algorithm and hyperparameters

In [41]:
from hyperopt import tpe, fmin, hp
import math as m

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [42]:
space = hp.choice('classifier',[
        {'model': 'KNeighborsClassifier',
        'param': {'n_neighbors':
                        hp.choice('n_neighbors',range(3,11)),
        'algorithm':hp.choice('algorithm', ['ball_tree', 'kd_tree']),
        'leaf_size':hp.choice('leaf_size', range(1,50)),
        'metric':hp.choice('metric', ["euclidean", "manhattan",
                           "chebyshev", "minkowski"
                           ])}
        },
        {'model': 'SVC',
        'param':{'C':hp.loguniform('C', -2*m.log(10), 11*m.log(10)),
        'kernel':hp.choice('kernel',['rbf', 'poly', 'sigmoid']),
        'degree':hp.choice('degree', range(1,6)),
        'gamma':hp.loguniform('gamma', -9*m.log(10), 3*m.log(10))}
        }
        ])

In [43]:
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data,
                                                    digits.target,
                                                    test_size=0.3)

logs = {'args':list(),
        'train_score': list(),
        'val_score': list()}

def objective_func(args):
    clf_func = args["model"]
    params = args["param"]
    
    # debugging with hyperopt is a pain, so be smart and use these statements when stuck
#     print(args)
    clf = eval(clf_func)(**params)
    clf.fit(X_train, y_train)
    
    val_score = clf.score(X_test, y_test)
    train_score = clf.score(X_train, y_train)
    
    logs['args'].append(args)
    logs['train_score'].append(train_score)
    logs['val_score'].append(val_score)
    
    return -val_score

best = fmin(objective_func, space, algo=tpe.suggest, max_evals=100)

100%|██████████| 100/100 [00:22<00:00,  4.53trial/s, best loss: -0.9925925925925926]


In [44]:
best

{'C': 32.84635448123946,
 'classifier': 1,
 'degree': 1,
 'gamma': 0.0027732611555239942,
 'kernel': 0}

### Let's try and optimize a Neural Network

In [45]:
from hyperopt import hp, tpe, fmin
from keras.datasets import mnist
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.utils import np_utils
import numpy as np

# load and preprocess the data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
classes = 10
input_shape = 784
y_train = np_utils.to_categorical(y_train, classes)
y_test = np_utils.to_categorical(y_test, classes)

#logs
logs = {'model_summary':list(),
        'val_acc': list()}

def obj_func(args):
    
    model = Sequential()
    
    #defining first hidden layer
    model.add(Dense(units=args['units']['layer_units_1'], 
                    input_shape=(input_shape, ),
                    name='layer_units_1'))
    
    #defining number of remaining hidden layer
    number_of_layers = len(args['units'])
    for layer in range(2, number_of_layers):
        model.add(Dense(units=args['units'][f'layer_units_{layer}'], 
                        name=f'layer_units_{layer}'))
        model.add(Dropout(args['dropout'][f'dropout_p_{layer}'], 
                          name=f'dropout_p_{layer}'))
        model.add(Activation(activation=args['activation'][f'activation_{layer}'], 
                             name=f'activation_{layer}'))
        
    model.add(Dense(classes, name=f'layer_unit_{layer+1}'))
    model.add(Activation(activation='softmax', name=f'activation_{layer+1}'))
    
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
                  optimizer='adam')
    
    result = model.fit(x_train, y_train,
                      batch_size=2,
                      epochs=1,
                      verbose=3,
                      validation_split=0.2)
    
    validation_acc = np.amax(result.history['val_accuracy'])
    print(validation_acc)
    logs['model_summary'].append(model.summary())
    logs['val_acc'].append(validation_acc)
    
    return -validation_acc

def each_layer(number_of_layers):
    params = {'units': dict(),
              'dropout': dict(),
              'activation': dict()}
    number_of_nodes = [16,36,64,128,256,512]
    for layer in range(number_of_layers):
        params['units'][f'layer_units_{layer}'] = hp.choice(f'layer_{number_of_layers}_{layer}', 
                                                            number_of_nodes)
        params['dropout'][f'dropout_p_{layer}'] = hp.uniform(f'dropout_{number_of_layers}_{layer}', 
                                                             0, 
                                                             0.8)
        params['activation'][f'activation_{layer}'] = hp.choice(f'activation_{number_of_layers}_{layer}', 
                                                                ['relu', 'elu'])
    return params

number_of_layers = [3, 5, 7, 9]
space = hp.choice('layers', [each_layer(n) for n in number_of_layers])
best = fmin(obj_func, space, algo=tpe.suggest, max_evals=5)

0.10599999874830246                                  
Model: "sequential"                                  

_________________________________________________________________

 Layer (type)                Output Shape              Param #   


 layer_units_1 (Dense)       (None, 128)               100480    

 layer_units_2 (Dense)       (None, 256)               33024     

 dropout_p_2 (Dropout)       (None, 256)               0         

 activation_2 (Activation)   (None, 256)               0         

 layer_units_3 (Dense)       (None, 36)                9252      

 dropout_p_3 (Dropout)       (None, 36)                0         

 activation_3 (Activation)   (None, 36)                0         

 layer_units_4 (Dense)       (None, 36)                1332      

 dropout_p_4 (Dropout)       (None, 36)                0         

 activation_4 (Activation)   (None, 36)                0         

 layer_units_5 (Dense)       (None, 16)                592       

 dropout_p_5 (Dropo

In [46]:
best

{'activation_5_0': 1,
 'activation_5_1': 1,
 'activation_5_2': 0,
 'activation_5_3': 1,
 'activation_5_4': 0,
 'dropout_5_0': 0.4600061311697963,
 'dropout_5_1': 0.16514474977751653,
 'dropout_5_2': 0.13413586890484872,
 'dropout_5_3': 0.3693425515016359,
 'dropout_5_4': 0.370320416042322,
 'layer_5_0': 1,
 'layer_5_1': 0,
 'layer_5_2': 1,
 'layer_5_3': 2,
 'layer_5_4': 4,
 'layers': 1}

### number of trials are too low here, but we can definitly perform an architecture search using this by increasing the number of trials

### Now go ahead and explore this amazing wrapper `Hyperas`

## Optuna

### Let's train a scikit-learn model with this library

In [47]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

import optuna
from optuna.samplers import TPESampler

digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data,
                                                    digits.target,
                                                    test_size=0.3)

def log(study, trial):
    print(f"Trial No.={trial.number}, HP_Set={trial.params}, Score={trial.value}")
    print(f"Best Value ={study.best_value}")

def objective_func(trial):
    
    classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
    if classifier_name == "SVC":
        c = trial.suggest_loguniform("svc_c", 1e-2, 1e+11)
        gamma = trial.suggest_loguniform("svc_gamma", 1e-9, 1e+3)
        kernel = trial.suggest_categorical("svc_kernel", ['rbf','poly','rbf','sigmoid'])
        degree = trial.suggest_categorical("svc_degree", range(1,15))
        clf = SVC(C=c, gamma=gamma, kernel=kernel, degree=degree)
    else:
        algorithm = trial.suggest_categorical("algorithm", ['ball_tree', "kd_tree"])
        leaf_size = trial.suggest_categorical("leaf_size", range(1,50))
        metric = trial.suggest_categorical("metic", ["euclidean","manhattan", "chebyshev","minkowski"])
        clf = KNeighborsClassifier(algorithm=algorithm, leaf_size=leaf_size, metric=metric)
        
    clf.fit(X_train, y_train)
    val_acc = clf.score(X_test, y_test)
    
    return val_acc

study = optuna.create_study(direction='maximize', sampler=TPESampler())
study.optimize(objective_func, n_trials=3, callbacks=[log])
best_trial = study.best_trial.value

print(f"Best trial  accuracy: {best_trial}")
print("parameters for best trail are :")
for key, value in study.best_trial.params.items():
    print(f"{key}: {value}")

[32m[I 2023-03-13 03:39:50,840][0m A new study created in memory with name: no-name-78f0363a-91df-48e0-a1d9-4d9bc6fa5069[0m
[32m[I 2023-03-13 03:39:50,912][0m Trial 0 finished with value: 0.9833333333333333 and parameters: {'classifier': 'RandomForest', 'algorithm': 'ball_tree', 'leaf_size': 16, 'metic': 'minkowski'}. Best is trial 0 with value: 0.9833333333333333.[0m
  c = trial.suggest_loguniform("svc_c", 1e-2, 1e+11)
  gamma = trial.suggest_loguniform("svc_gamma", 1e-9, 1e+3)


Trial No.=0, HP_Set={'classifier': 'RandomForest', 'algorithm': 'ball_tree', 'leaf_size': 16, 'metic': 'minkowski'}, Score=0.9833333333333333
Best Value =0.9833333333333333


[32m[I 2023-03-13 03:39:51,126][0m Trial 1 finished with value: 0.8203703703703704 and parameters: {'classifier': 'SVC', 'svc_c': 11865.165644029325, 'svc_gamma': 0.00858615894716739, 'svc_kernel': 'rbf', 'svc_degree': 4}. Best is trial 0 with value: 0.9833333333333333.[0m
  c = trial.suggest_loguniform("svc_c", 1e-2, 1e+11)
  gamma = trial.suggest_loguniform("svc_gamma", 1e-9, 1e+3)
[32m[I 2023-03-13 03:39:51,165][0m Trial 2 finished with value: 0.9611111111111111 and parameters: {'classifier': 'SVC', 'svc_c': 193392380.36786938, 'svc_gamma': 0.00032461410484564403, 'svc_kernel': 'poly', 'svc_degree': 9}. Best is trial 0 with value: 0.9833333333333333.[0m


Trial No.=1, HP_Set={'classifier': 'SVC', 'svc_c': 11865.165644029325, 'svc_gamma': 0.00858615894716739, 'svc_kernel': 'rbf', 'svc_degree': 4}, Score=0.8203703703703704
Best Value =0.9833333333333333
Trial No.=2, HP_Set={'classifier': 'SVC', 'svc_c': 193392380.36786938, 'svc_gamma': 0.00032461410484564403, 'svc_kernel': 'poly', 'svc_degree': 9}, Score=0.9611111111111111
Best Value =0.9833333333333333
Best trial  accuracy: 0.9833333333333333
parameters for best trail are :
classifier: RandomForest
algorithm: ball_tree
leaf_size: 16
metic: minkowski


### and now a Neural Network

In [48]:
from keras.datasets import mnist
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.utils import np_utils
import numpy as np

import optuna
from optuna.samplers import TPESampler

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
classes = 10
input_shape = 784
y_train = np_utils.to_categorical(y_train, classes)
y_test = np_utils.to_categorical(y_test, classes)
x_train, y_train, x_test, y_test, input_shape, classes

def log(study, trial):
    print(f"Trial No.={trial.number}, HP_Set={trial.params}, \
          Score={trial.value}")
    print(f"Best Value ={study.best_value}")

def objective_func(trial):
 
    model = Sequential()

    hidden_layer_unit_choice = [32, 64, 256, 512, 1024]

    hidden_layers = trial.suggest_int('hidden_layers', 1, 6)
    
    model.add(Dense(units=trial.suggest_categorical('layer1', [8, 16]), 
                    input_shape=(input_shape, ),
                    name='dense1'))
    model.add(Activation(activation=trial.suggest_categorical(f'activation1',
                                                               ['relu', 'elu'])))

    for i in range(1, hidden_layers):
        
        model.add(Dense(units=trial.suggest_categorical(f'layer{i+1}', 
                                                        hidden_layer_unit_choice)))
        model.add(Dropout(trial.suggest_uniform(f'dropout{i+1}', 0, 0.8)))
        model.add(Activation(activation=trial.suggest_categorical(f'activation{i+1}', 
                                                                  ['relu', 'elu'])))

    model.add(Dense(classes))
    model.add(Activation(activation='softmax'))

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
                  optimizer=trial.suggest_categorical('optimizer', ['rmsprop', 'adam', 'sgd']))

    result = model.fit(x_train, y_train,
                      batch_size=4,
                      epochs=1,
                      verbose=3,
                      validation_split=0.2)

    validation_acc = np.amax(result.history['val_accuracy'])
    print('Validation accuracy:', validation_acc)

    return validation_acc


study = optuna.create_study(direction='maximize', sampler=TPESampler())
# increase the number of trials
study.optimize(objective_func, n_trials=5, callbacks=[log])
best_trial = study.best_trial.value

print(f"Best trial  accuracy: {best_trial}")
print("parameters for best trail are :")
for key, value in study.best_trial.params.items():
    print(f"{key}: {value}")

[32m[I 2023-03-13 03:39:51,477][0m A new study created in memory with name: no-name-786fb32d-4c77-40c1-8374-e50dc05eaa6d[0m
  model.add(Dropout(trial.suggest_uniform(f'dropout{i+1}', 0, 0.8)))
[32m[I 2023-03-13 03:40:13,204][0m Trial 0 finished with value: 0.9300000071525574 and parameters: {'hidden_layers': 4, 'layer1': 16, 'activation1': 'elu', 'layer2': 1024, 'dropout2': 0.253108118573581, 'activation2': 'elu', 'layer3': 512, 'dropout3': 0.27827391653624706, 'activation3': 'relu', 'layer4': 32, 'dropout4': 0.13852709093383445, 'activation4': 'relu', 'optimizer': 'sgd'}. Best is trial 0 with value: 0.9300000071525574.[0m


Validation accuracy: 0.9300000071525574
Trial No.=0, HP_Set={'hidden_layers': 4, 'layer1': 16, 'activation1': 'elu', 'layer2': 1024, 'dropout2': 0.253108118573581, 'activation2': 'elu', 'layer3': 512, 'dropout3': 0.27827391653624706, 'activation3': 'relu', 'layer4': 32, 'dropout4': 0.13852709093383445, 'activation4': 'relu', 'optimizer': 'sgd'},           Score=0.9300000071525574
Best Value =0.9300000071525574


[32m[I 2023-03-13 03:40:24,906][0m Trial 1 finished with value: 0.9352499842643738 and parameters: {'hidden_layers': 2, 'layer1': 16, 'activation1': 'elu', 'layer2': 512, 'dropout2': 0.28721228301155755, 'activation2': 'elu', 'optimizer': 'rmsprop'}. Best is trial 1 with value: 0.9352499842643738.[0m


Validation accuracy: 0.9352499842643738
Trial No.=1, HP_Set={'hidden_layers': 2, 'layer1': 16, 'activation1': 'elu', 'layer2': 512, 'dropout2': 0.28721228301155755, 'activation2': 'elu', 'optimizer': 'rmsprop'},           Score=0.9352499842643738
Best Value =0.9352499842643738


[32m[I 2023-03-13 03:40:36,337][0m Trial 2 finished with value: 0.8840000033378601 and parameters: {'hidden_layers': 3, 'layer1': 8, 'activation1': 'elu', 'layer2': 32, 'dropout2': 0.7715722557251717, 'activation2': 'elu', 'layer3': 512, 'dropout3': 0.07201000245934851, 'activation3': 'relu', 'optimizer': 'sgd'}. Best is trial 1 with value: 0.9352499842643738.[0m


Validation accuracy: 0.8840000033378601
Trial No.=2, HP_Set={'hidden_layers': 3, 'layer1': 8, 'activation1': 'elu', 'layer2': 32, 'dropout2': 0.7715722557251717, 'activation2': 'elu', 'layer3': 512, 'dropout3': 0.07201000245934851, 'activation3': 'relu', 'optimizer': 'sgd'},           Score=0.8840000033378601
Best Value =0.9352499842643738


[32m[I 2023-03-13 03:40:46,182][0m Trial 3 finished with value: 0.909250020980835 and parameters: {'hidden_layers': 1, 'layer1': 8, 'activation1': 'elu', 'optimizer': 'sgd'}. Best is trial 1 with value: 0.9352499842643738.[0m


Validation accuracy: 0.909250020980835
Trial No.=3, HP_Set={'hidden_layers': 1, 'layer1': 8, 'activation1': 'elu', 'optimizer': 'sgd'},           Score=0.909250020980835
Best Value =0.9352499842643738


[32m[I 2023-03-13 03:40:57,662][0m Trial 4 finished with value: 0.9154999852180481 and parameters: {'hidden_layers': 2, 'layer1': 8, 'activation1': 'relu', 'layer2': 64, 'dropout2': 0.21112289544859097, 'activation2': 'relu', 'optimizer': 'adam'}. Best is trial 1 with value: 0.9352499842643738.[0m


Validation accuracy: 0.9154999852180481
Trial No.=4, HP_Set={'hidden_layers': 2, 'layer1': 8, 'activation1': 'relu', 'layer2': 64, 'dropout2': 0.21112289544859097, 'activation2': 'relu', 'optimizer': 'adam'},           Score=0.9154999852180481
Best Value =0.9352499842643738
Best trial  accuracy: 0.9352499842643738
parameters for best trail are :
hidden_layers: 2
layer1: 16
activation1: elu
layer2: 512
dropout2: 0.28721228301155755
activation2: elu
optimizer: rmsprop


# TPE is one of the best Bayesian Hyperparameter Optimization Algorithm out there, now choose your poison Optuna Or HyperOpt?