# Optuna 

Run optuna locally on a machine

In [1]:
import optuna

## Simple run of Optuna

Objective function

In [2]:
my_function = lambda x,y: ((x+1)*(y-2))**2 - 2*(x-2) - (y+2)

def objective(trial: optuna.trial.Trial):
    x = trial.suggest_float("x", -20, 20)
    y = trial.suggest_float("y", -20, 20)
    return my_function(x,y)

Study defintion

In [3]:
study = optuna.create_study(study_name="MyStudy", direction="minimize")
#optuna.logging.set_verbosity(optuna.logging.WARN)

[32m[I 2021-08-18 13:27:58,366][0m A new study created in memory with name: MyStudy[0m


Run the study

In [4]:
study.optimize(objective, n_trials=100, timeout=10)
best_params = study.best_params
print(best_params)

[32m[I 2021-08-18 13:27:59,530][0m Trial 0 finished with value: 1010.4007928988541 and parameters: {'x': 0.7566670175949355, 'y': -15.947192514271716}. Best is trial 0 with value: 1010.4007928988541.[0m
[32m[I 2021-08-18 13:27:59,531][0m Trial 1 finished with value: 57.909965245435984 and parameters: {'x': 0.5954876876944084, 'y': -2.625992819028159}. Best is trial 1 with value: 57.909965245435984.[0m
[32m[I 2021-08-18 13:27:59,532][0m Trial 2 finished with value: 13101.58710561401 and parameters: {'x': 10.177375829804362, 'y': -8.244472586971657}. Best is trial 1 with value: 57.909965245435984.[0m
[32m[I 2021-08-18 13:27:59,534][0m Trial 3 finished with value: 27.034923318910586 and parameters: {'x': -1.1771572075764958, 'y': -14.320760179531984}. Best is trial 3 with value: 27.034923318910586.[0m
[32m[I 2021-08-18 13:27:59,537][0m Trial 4 finished with value: 2395.78590145289 and parameters: {'x': 6.725251301237655, 'y': -4.345333107335048}. Best is trial 3 with value: 

{'x': 2.764407281869908, 'y': 2.478744526835}


## Store and restart

In [5]:
%%bash
mkdir -p data

In [6]:
db_location = "sqlite:///data/optuna.db"
study_name = "RestartStudy"

In [7]:
%%bash 
rm -f ./data/optuna.db

In [8]:
study = optuna.create_study(study_name=study_name, direction="minimize", storage=db_location)
study.optimize(objective, n_trials=10, timeout=10)
best_params = study.best_params
print(best_params)

[32m[I 2021-08-18 13:28:07,446][0m A new study created in RDB with name: RestartStudy[0m
[32m[I 2021-08-18 13:28:07,541][0m Trial 0 finished with value: 22236.460129719155 and parameters: {'x': 9.651043979330783, 'y': 16.0108898319764}. Best is trial 0 with value: 22236.460129719155.[0m
[32m[I 2021-08-18 13:28:07,610][0m Trial 1 finished with value: 2493.2598265719935 and parameters: {'x': 18.843214215497753, 'y': -0.5340309787791}. Best is trial 1 with value: 2493.2598265719935.[0m
[32m[I 2021-08-18 13:28:07,681][0m Trial 2 finished with value: 4513.189805347608 and parameters: {'x': -8.049812248759505, 'y': 11.522425040152942}. Best is trial 1 with value: 2493.2598265719935.[0m
[32m[I 2021-08-18 13:28:07,752][0m Trial 3 finished with value: 56794.57472721095 and parameters: {'x': 16.037613428517325, 'y': -11.989873965300625}. Best is trial 1 with value: 2493.2598265719935.[0m
[32m[I 2021-08-18 13:28:07,837][0m Trial 4 finished with value: 99.77900494369216 and parame

{'x': -1.580739784997924, 'y': -13.507953009190587}


In [9]:
reloaded_study = optuna.load_study(study_name=study_name, storage=db_location)
study.optimize(objective, timeout=10)
best_params = study.best_params
print(best_params)

[32m[I 2021-08-18 13:28:12,181][0m Trial 10 finished with value: 167598.2079005521 and parameters: {'x': -19.847772016734886, 'y': -19.716762529514206}. Best is trial 4 with value: 99.77900494369216.[0m
[32m[I 2021-08-18 13:28:12,264][0m Trial 11 finished with value: 413.77258834470763 and parameters: {'x': 1.4966298368990225, 'y': -6.097137926605075}. Best is trial 4 with value: 99.77900494369216.[0m
[32m[I 2021-08-18 13:28:12,340][0m Trial 12 finished with value: 2379.564361129555 and parameters: {'x': 7.015178999409873, 'y': -4.096187707101802}. Best is trial 4 with value: 99.77900494369216.[0m
[32m[I 2021-08-18 13:28:12,412][0m Trial 13 finished with value: 948.6694443909681 and parameters: {'x': 2.6495472438459817, 'y': -6.425615460056611}. Best is trial 4 with value: 99.77900494369216.[0m
[32m[I 2021-08-18 13:28:12,483][0m Trial 14 finished with value: 24.851225438586923 and parameters: {'x': -0.9394278957858504, 'y': -19.306737041262853}. Best is trial 14 with valu

{'x': 19.278139037661507, 'y': 1.9821475334838006}


## Visualisations

In [10]:
optuna.visualization.plot_optimization_history(reloaded_study)

In [11]:
optuna.visualization.plot_contour(study=reloaded_study, params=["x","y"])

## Multiobjective with scikit-learn

In [12]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score
data = load_breast_cancer()

X, X_validate, y, y_validate = train_test_split(data.data, data.target, test_size=0.2)

In [15]:
def dct_optimize_multi(trial: optuna.trial.Trial):
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    criterion=trial.suggest_categorical('criterion', ['gini', 'entropy'])
    max_depth = trial.suggest_int('max_depth', 1, 5)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10, 2)
    class_weight = trial.suggest_categorical('class_weight', ['balanced', None])

    dtc = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth, min_samples_split=min_samples_split, class_weight=class_weight)
    dtc.fit(X_train, y_train)
    y_pred = dtc.predict(X_test)
    accurancy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    return accurancy, f1

breast_study_multi = optuna.create_study(study_name='test-multi', directions=['maximize', 'maximize'])

breast_study_multi.optimize(dct_optimize_multi, n_trials=40)

print(breast_study_multi.best_trials)

[32m[I 2021-08-18 13:29:13,496][0m A new study created in memory with name: test-multi[0m
[32m[I 2021-08-18 13:29:13,506][0m Trial 0 finished with values: [0.9120879120879121, 0.9215686274509804] and parameters: {'criterion': 'gini', 'max_depth': 3, 'min_samples_split': 2, 'class_weight': None}. [0m
[32m[I 2021-08-18 13:29:13,516][0m Trial 1 finished with values: [0.8901098901098901, 0.9152542372881356] and parameters: {'criterion': 'entropy', 'max_depth': 4, 'min_samples_split': 10, 'class_weight': 'balanced'}. [0m
[32m[I 2021-08-18 13:29:13,522][0m Trial 2 finished with values: [0.9010989010989011, 0.9243697478991596] and parameters: {'criterion': 'gini', 'max_depth': 4, 'min_samples_split': 2, 'class_weight': None}. [0m
[32m[I 2021-08-18 13:29:13,528][0m Trial 3 finished with values: [0.9120879120879121, 0.9272727272727272] and parameters: {'criterion': 'gini', 'max_depth': 1, 'min_samples_split': 6, 'class_weight': 'balanced'}. [0m
[32m[I 2021-08-18 13:29:13,537][0

[FrozenTrial(number=15, values=[0.967032967032967, 0.9743589743589743], datetime_start=datetime.datetime(2021, 8, 18, 13, 29, 13, 612653), datetime_complete=datetime.datetime(2021, 8, 18, 13, 29, 13, 619654), params={'criterion': 'entropy', 'max_depth': 3, 'min_samples_split': 4, 'class_weight': None}, distributions={'criterion': CategoricalDistribution(choices=('gini', 'entropy')), 'max_depth': IntUniformDistribution(high=5, low=1, step=1), 'min_samples_split': IntUniformDistribution(high=10, low=2, step=2), 'class_weight': CategoricalDistribution(choices=('balanced', None))}, user_attrs={}, system_attrs={'nsga2:generation': 0}, intermediate_values={}, trial_id=15, state=TrialState.COMPLETE, value=None)]
