## Section 5.1

#### Hyperparameter Optimization on Scikit-Learn Algorithms

In [2]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

import optuna
from optuna.samplers import TPESampler

digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data,
                                                    digits.target,
                                                    test_size=0.3)

def log(study, trial):
    print(f"Trial No.={trial.number}, HP_Set={trial.params}, Score={trial.value}")
    print(f"Best Value ={study.best_value}")

def objective_func(trial):
    
    classifier_name = trial.suggest_categorical("classifier", ["SVC", "RandomForest"])
    if classifier_name == "SVC":
        c = trial.suggest_loguniform("svc_c", 1e-2, 1e+11)
        gamma = trial.suggest_loguniform("svc_gamma", 1e-9, 1e+3)
        kernel = trial.suggest_categorical("svc_kernel", ['rbf','poly','rbf','sigmoid'])
        degree = trial.suggest_categorical("svc_degree", range(1,15))
        clf = SVC(C=c, gamma=gamma, kernel=kernel, degree=degree)
    else:
        algorithm = trial.suggest_categorical("algorithm", ['ball_tree', "kd_tree"])
        leaf_size = trial.suggest_categorical("leaf_size", range(1,50))
        metric = trial.suggest_categorical("metic", ["euclidean","manhattan", "chebyshev","minkowski"])
        clf = KNeighborsClassifier(algorithm=algorithm, leaf_size=leaf_size, metric=metric)
        
    clf.fit(X_train, y_train)
    val_acc = clf.score(X_test, y_test)
    
    return val_acc

study = optuna.create_study(direction='maximize', sampler=TPESampler())
study.optimize(objective_func, n_trials=3, callbacks=[log])
best_trial = study.best_trial.value

print(f"Best trial  accuracy: {best_trial}")
print("parameters for best trail are :")
for key, value in study.best_trial.params.items():
    print(f"{key}: {value}")

[I 2020-09-10 02:57:15,805] Trial 0 finished with value: 0.9666666666666667 and parameters: {'classifier': 'SVC', 'svc_c': 1252195959.321192, 'svc_gamma': 7.979672376892811e-09, 'svc_kernel': 'rbf', 'svc_degree': 6}. Best is trial 0 with value: 0.9666666666666667.


Trial No.=0, HP_Set={'classifier': 'SVC', 'svc_c': 1252195959.321192, 'svc_gamma': 7.979672376892811e-09, 'svc_kernel': 'rbf', 'svc_degree': 6}, Score=0.9666666666666667
Best Value =0.9666666666666667


[I 2020-09-10 02:57:16,749] Trial 1 finished with value: 0.6111111111111112 and parameters: {'classifier': 'SVC', 'svc_c': 476.57129824526254, 'svc_gamma': 8.796518473446763e-09, 'svc_kernel': 'rbf', 'svc_degree': 6}. Best is trial 0 with value: 0.9666666666666667.


Trial No.=1, HP_Set={'classifier': 'SVC', 'svc_c': 476.57129824526254, 'svc_gamma': 8.796518473446763e-09, 'svc_kernel': 'rbf', 'svc_degree': 6}, Score=0.6111111111111112
Best Value =0.9666666666666667


[I 2020-09-10 02:57:17,693] Trial 2 finished with value: 0.08518518518518518 and parameters: {'classifier': 'SVC', 'svc_c': 22.847037995560534, 'svc_gamma': 37.76383003319741, 'svc_kernel': 'rbf', 'svc_degree': 12}. Best is trial 0 with value: 0.9666666666666667.


Trial No.=2, HP_Set={'classifier': 'SVC', 'svc_c': 22.847037995560534, 'svc_gamma': 37.76383003319741, 'svc_kernel': 'rbf', 'svc_degree': 12}, Score=0.08518518518518518
Best Value =0.9666666666666667
Best trial  accuracy: 0.9666666666666667
parameters for best trail are :
classifier: SVC
svc_c: 1252195959.321192
svc_gamma: 7.979672376892811e-09
svc_kernel: rbf
svc_degree: 6


#### Optimizing a neural network with Optuna

In [1]:
from keras.datasets import mnist
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.utils import np_utils
import numpy as np

import optuna
from optuna.samplers import TPESampler

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
classes = 10
input_shape = 784
y_train = np_utils.to_categorical(y_train, classes)
y_test = np_utils.to_categorical(y_test, classes)
x_train, y_train, x_test, y_test, input_shape, classes

def log(study, trial):
    print(f"Trial No.={trial.number}, HP_Set={trial.params}, \
          Score={trial.value}")
    print(f"Best Value ={study.best_value}")

def objective_func(trial):
 
    model = Sequential()

    hidden_layer_unit_choice = [32, 64, 256, 512, 1024]

    hidden_layers = trial.suggest_int('hidden_layers', 1, 6)
    
    model.add(Dense(units=trial.suggest_categorical('layer1', [8, 16]), 
                    input_shape=(input_shape, ),
                    name='dense1'))
    model.add(Activation(activation=trial.suggest_categorical(f'activation1',
                                                               ['relu', 'elu'])))

    for i in range(1, hidden_layers):
        
        model.add(Dense(units=trial.suggest_categorical(f'layer{i+1}', 
                                                        hidden_layer_unit_choice)))
        model.add(Dropout(trial.suggest_uniform(f'dropout{i+1}', 0, 0.8)))
        model.add(Activation(activation=trial.suggest_categorical(f'activation{i+1}', 
                                                                  ['relu', 'elu'])))

    model.add(Dense(classes))
    model.add(Activation(activation='softmax'))

    model.compile(loss='categorical_crossentropy', metrics=['accuracy'],
                  optimizer=trial.suggest_categorical('optimizer', ['rmsprop', 'adam', 'sgd']))

    result = model.fit(x_train, y_train,
                      batch_size=4,
                      epochs=1,
                      verbose=3,
                      validation_split=0.2)

    validation_acc = np.amax(result.history['val_accuracy'])
    print('Validation accuracy:', validation_acc)

    return validation_acc


study = optuna.create_study(direction='maximize', sampler=TPESampler())
# increase the number of trials
study.optimize(objective_func, n_trials=5, callbacks=[log])
best_trial = study.best_trial.value

print(f"Best trial  accuracy: {best_trial}")
print("parameters for best trail are :")
for key, value in study.best_trial.params.items():
    print(f"{key}: {value}")

Validation accuracy: 0.9381666779518127


[I 2020-09-10 02:51:57,161] Trial 0 finished with value: 0.9381666779518127 and parameters: {'hidden_layers': 3, 'layer1': 16, 'activation1': 'relu', 'layer2': 256, 'dropout2': 0.25141053044999245, 'activation2': 'elu', 'layer3': 512, 'dropout3': 0.11016390109757027, 'activation3': 'relu', 'optimizer': 'sgd'}. Best is trial 0 with value: 0.9381666779518127.


Trial No.=0, HP_Set={'hidden_layers': 3, 'layer1': 16, 'activation1': 'relu', 'layer2': 256, 'dropout2': 0.25141053044999245, 'activation2': 'elu', 'layer3': 512, 'dropout3': 0.11016390109757027, 'activation3': 'relu', 'optimizer': 'sgd'},           Score=0.9381666779518127
Best Value =0.9381666779518127
Validation accuracy: 0.9210833311080933


[I 2020-09-10 02:54:42,359] Trial 1 finished with value: 0.9210833311080933 and parameters: {'hidden_layers': 6, 'layer1': 16, 'activation1': 'elu', 'layer2': 1024, 'dropout2': 0.0770311428707621, 'activation2': 'relu', 'layer3': 256, 'dropout3': 0.23867690400017977, 'activation3': 'elu', 'layer4': 512, 'dropout4': 0.4670137464296996, 'activation4': 'relu', 'layer5': 1024, 'dropout5': 0.6589426174719933, 'activation5': 'elu', 'layer6': 64, 'dropout6': 0.1908511103364826, 'activation6': 'elu', 'optimizer': 'adam'}. Best is trial 0 with value: 0.9381666779518127.


Trial No.=1, HP_Set={'hidden_layers': 6, 'layer1': 16, 'activation1': 'elu', 'layer2': 1024, 'dropout2': 0.0770311428707621, 'activation2': 'relu', 'layer3': 256, 'dropout3': 0.23867690400017977, 'activation3': 'elu', 'layer4': 512, 'dropout4': 0.4670137464296996, 'activation4': 'relu', 'layer5': 1024, 'dropout5': 0.6589426174719933, 'activation5': 'elu', 'layer6': 64, 'dropout6': 0.1908511103364826, 'activation6': 'elu', 'optimizer': 'adam'},           Score=0.9210833311080933
Best Value =0.9381666779518127
Validation accuracy: 0.8883333206176758


[I 2020-09-10 02:55:29,369] Trial 2 finished with value: 0.8883333206176758 and parameters: {'hidden_layers': 4, 'layer1': 8, 'activation1': 'elu', 'layer2': 512, 'dropout2': 0.7633752407047723, 'activation2': 'elu', 'layer3': 64, 'dropout3': 0.50947871168164, 'activation3': 'relu', 'layer4': 32, 'dropout4': 0.20842638075616451, 'activation4': 'elu', 'optimizer': 'rmsprop'}. Best is trial 0 with value: 0.9381666779518127.


Trial No.=2, HP_Set={'hidden_layers': 4, 'layer1': 8, 'activation1': 'elu', 'layer2': 512, 'dropout2': 0.7633752407047723, 'activation2': 'elu', 'layer3': 64, 'dropout3': 0.50947871168164, 'activation3': 'relu', 'layer4': 32, 'dropout4': 0.20842638075616451, 'activation4': 'elu', 'optimizer': 'rmsprop'},           Score=0.8883333206176758
Best Value =0.9381666779518127
Validation accuracy: 0.39908334612846375


[I 2020-09-10 02:56:38,688] Trial 3 finished with value: 0.39908334612846375 and parameters: {'hidden_layers': 5, 'layer1': 16, 'activation1': 'relu', 'layer2': 64, 'dropout2': 0.7005369634674325, 'activation2': 'elu', 'layer3': 32, 'dropout3': 0.6809845398478749, 'activation3': 'elu', 'layer4': 256, 'dropout4': 0.6103458818599555, 'activation4': 'relu', 'layer5': 512, 'dropout5': 0.6887479204074419, 'activation5': 'relu', 'optimizer': 'adam'}. Best is trial 0 with value: 0.9381666779518127.


Trial No.=3, HP_Set={'hidden_layers': 5, 'layer1': 16, 'activation1': 'relu', 'layer2': 64, 'dropout2': 0.7005369634674325, 'activation2': 'elu', 'layer3': 32, 'dropout3': 0.6809845398478749, 'activation3': 'elu', 'layer4': 256, 'dropout4': 0.6103458818599555, 'activation4': 'relu', 'layer5': 512, 'dropout5': 0.6887479204074419, 'activation5': 'relu', 'optimizer': 'adam'},           Score=0.39908334612846375
Best Value =0.9381666779518127
Validation accuracy: 0.8981666564941406


[I 2020-09-10 02:57:14,683] Trial 4 finished with value: 0.8981666564941406 and parameters: {'hidden_layers': 1, 'layer1': 8, 'activation1': 'elu', 'optimizer': 'rmsprop'}. Best is trial 0 with value: 0.9381666779518127.


Trial No.=4, HP_Set={'hidden_layers': 1, 'layer1': 8, 'activation1': 'elu', 'optimizer': 'rmsprop'},           Score=0.8981666564941406
Best Value =0.9381666779518127
Best trial  accuracy: 0.9381666779518127
parameters for best trail are :
hidden_layers: 3
layer1: 16
activation1: relu
layer2: 256
dropout2: 0.25141053044999245
activation2: elu
layer3: 512
dropout3: 0.11016390109757027
activation3: relu
optimizer: sgd
