In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import torch.nn.functional as F
from torch import nn
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

# manage ray's relative imports
import ray
runtime_env = {"working_dir": ".." }
ray.init(runtime_env=runtime_env, dashboard_port=13065, include_dashboard=True)

from ray import tune
from ray.tune.suggest.optuna import OptunaSearch
from ray.tune import JupyterNotebookReporter

# manage beams's relative imports
import sys
sys.path.append('..')

from src.beam import beam_arguments, Experiment, Study
from src.beam import UniversalDataset, UniversalBatchSampler
from src.beam import Algorithm
from src.beam import LinearNet

from src.beam import DataTensor
from src.beam.utils import is_notebook
from cifar10_example import cifar10_algorithm_generator, Cifar10Network

from ray.tune.suggest.hebo import HEBOSearch

2022-06-14 12:51:13,043	INFO services.py:1456 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:13065[39m[22m
2022-06-14 12:51:14,796	INFO packaging.py:388 -- Creating a file package for local directory '..'.
2022-06-14 12:51:15,366	INFO packaging.py:241 -- Pushing file package 'gcs://_ray_pkg_b281c113605b1358.zip' (0.71MiB) to Ray cluster...
2022-06-14 12:51:15,389	INFO packaging.py:243 -- Successfully pushed file package 'gcs://_ray_pkg_b281c113605b1358.zip'.


In [2]:
path_to_data = '/localdata/elads/data/datasets/cifar10'
root_dir = '/localdata/elads/data/cifar10'

# path_to_data = '/home/shared/data/dataset/cifar10'
# root_dir = '/home/shared/data/results/cifar10'

## Training with a single worker

In [7]:
args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --device=1 --half --lr-d=0.01 --batch-size=1024",
                      "--n-epochs=6 --epoch-length-train=1000000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=1e-5 --beta1=0.9 --beta2=0.9", path_to_data=path_to_data)

In [3]:
# here you put all actions which are performed only once before initializing the workers
# for example, setting running arguments and experiment:

args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --device=1 --half --lr-d=0.01 --batch-size=1024",
                      "--n-epochs=6 --epoch-length-train=1000000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=1e-5 --beta1=0.9 --beta2=0.9", path_to_data=path_to_data)

experiment = Experiment(args)

[32m2022-06-14 06:19:35.973[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m99[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 06:19:35.975[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m100[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 06:19:35.976[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m104[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 06:19:35.976[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m104[0m - [1midentifier: debug[0m
[32m2022-06-14 06:19:35.977[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m104[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 06:19:35.978[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m104[0m - [1mmp_port: None[0m
[32m2022-06-14 06:19:35.979[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m104[0m - [1mroot_d

## Train with single or multiple workers

In [4]:
alg = experiment(cifar10_algorithm_generator)

[32m2022-06-14 06:19:36.782[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m454[0m - [1mSingle worker mode[0m
[32m2022-06-14 06:19:36.783[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m44[0m - [1mWorker: 1/1 is running...[0m


train:   0%|          | 0/975 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 06:20:51.928[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m308[0m - [1mFinished epoch 1/6:[0m
[32m2022-06-14 06:20:51.933[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtrain:[0m
[32m2022-06-14 06:20:51.934[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 528.213 	|[0m
[32m2022-06-14 06:20:51.935[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1macc 0.81942 	|[0m
[32m2022-06-14 06:20:51.936[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mlr 0.01 	|[0m
[32m2022-06-14 06:20:51.937[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtest:[0m
[32m2022-06-14 06:20:51.937[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 530.667 	|[0m
[32m2022-06-14 06:20:51.938[

train:   0%|          | 0/975 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 06:21:58.702[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m308[0m - [1mFinished epoch 2/6:[0m
[32m2022-06-14 06:21:58.706[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtrain:[0m
[32m2022-06-14 06:21:58.707[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 175.837 	|[0m
[32m2022-06-14 06:21:58.708[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1macc 0.940334 	|[0m
[32m2022-06-14 06:21:58.709[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mlr 0.00316228 	|[0m
[32m2022-06-14 06:21:58.709[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtest:[0m
[32m2022-06-14 06:21:58.710[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 439.417 	|[0m
[32m2022-06-14 06:21:5

train:   0%|          | 0/975 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 06:23:07.093[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m308[0m - [1mFinished epoch 3/6:[0m
[32m2022-06-14 06:23:07.100[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtrain:[0m
[32m2022-06-14 06:23:07.101[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 106.344 	|[0m
[32m2022-06-14 06:23:07.103[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1macc 0.964535 	|[0m
[32m2022-06-14 06:23:07.104[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mlr 0.001 	|[0m
[32m2022-06-14 06:23:07.105[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtest:[0m
[32m2022-06-14 06:23:07.106[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 439.167 	|[0m
[32m2022-06-14 06:23:07.107

train:   0%|          | 0/975 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 06:24:17.013[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m308[0m - [1mFinished epoch 4/6:[0m
[32m2022-06-14 06:24:17.017[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtrain:[0m
[32m2022-06-14 06:24:17.018[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 94.3002 	|[0m
[32m2022-06-14 06:24:17.020[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1macc 0.968589 	|[0m
[32m2022-06-14 06:24:17.021[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mlr 0.000316228 	|[0m
[32m2022-06-14 06:24:17.021[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtest:[0m
[32m2022-06-14 06:24:17.022[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 453.583 	|[0m
[32m2022-06-14 06:24:

train:   0%|          | 0/975 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 06:25:26.106[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m308[0m - [1mFinished epoch 5/6:[0m
[32m2022-06-14 06:25:26.111[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtrain:[0m
[32m2022-06-14 06:25:26.111[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 87.093 	|[0m
[32m2022-06-14 06:25:26.112[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1macc 0.971361 	|[0m
[32m2022-06-14 06:25:26.113[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mlr 0.0001 	|[0m
[32m2022-06-14 06:25:26.114[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtest:[0m
[32m2022-06-14 06:25:26.114[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 435.583 	|[0m
[32m2022-06-14 06:25:26.115

train:   0%|          | 0/975 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 06:26:35.101[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m308[0m - [1mFinished epoch 6/6:[0m
[32m2022-06-14 06:26:35.106[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtrain:[0m
[32m2022-06-14 06:26:35.106[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 80.7526 	|[0m
[32m2022-06-14 06:26:35.107[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1macc 0.973349 	|[0m
[32m2022-06-14 06:26:35.108[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mlr 3.16228e-05 	|[0m
[32m2022-06-14 06:26:35.109[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m346[0m - [1mtest:[0m
[32m2022-06-14 06:26:35.109[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m350[0m - [1mloss 446.306 	|[0m
[32m2022-06-14 06:26:

In [5]:
np.mean(alg.evaluate('test')['scalar']['acc'])

test:   0%|          | 0/10 [00:00<?, ?it/s]

0.8862802922725678

In [6]:
# alg.optimizers['net'].scaler.get_scale()

## Hyperparameter search with native optuna

In [3]:
args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --half --lr-d=0.01 --batch-size=1024",
                      "--n-epochs=4 --epoch-length-train=200000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=1e-5 --device=1 --beta1=0.9 --beta2=0.9", path_to_data=path_to_data)

study = Study(cifar10_algorithm_generator, args)

[32m2022-06-14 12:22:13.838[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m85[0m - [1mHyperparameter Optimization[0m
[32m2022-06-14 12:22:13.839[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m86[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 12:22:13.840[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m87[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 12:22:13.841[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 12:22:13.842[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1midentifier: debug_hp_optimization_20220614_122213[0m
[32m2022-06-14 12:22:13.843[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 12:22:13.844[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[

In [58]:
def suggest(trial):
    lr = trial.suggest_loguniform("lr", 1e-3, 2e-2)
    print('My suggestion')
    print(lr)
    return {'lr_dense': lr}
    

In [59]:
study.optuna(suggest, direction='maximize', n_jobs=1, n_trials=10)

[32m[I 2022-06-14 09:37:27,339][0m A new study created in memory with name: cifar10/CIFAR10Algorithm/debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m


My suggestion
0.004812922221223897
[32m2022-06-14 09:37:27.342[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:37:27.343[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:37:27.345[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:37:27.345[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:37:27.346[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:37:27.347[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: Non

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:38:20,651][0m Trial 0 finished with value: 0.8161892361111112 and parameters: {'lr': 0.004812922221223897}. Best is trial 0 with value: 0.8161892361111112.[0m


My suggestion
0.004845938308034776
[32m2022-06-14 09:38:20.653[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:38:20.654[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:38:20.655[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:38:20.656[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:38:20.657[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:38:20.657[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: Non

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:39:14,827][0m Trial 1 finished with value: 0.8365885416666666 and parameters: {'lr': 0.004845938308034776}. Best is trial 1 with value: 0.8365885416666666.[0m


My suggestion
0.01711712997670714
[32m2022-06-14 09:39:14.829[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:39:14.830[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:39:14.831[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:39:14.832[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:39:14.833[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:39:14.833[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: None

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:40:10,465][0m Trial 2 finished with value: 0.8347439236111112 and parameters: {'lr': 0.01711712997670714}. Best is trial 1 with value: 0.8365885416666666.[0m


My suggestion
0.0027556186008022045
[32m2022-06-14 09:40:10.467[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:40:10.468[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:40:10.469[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:40:10.470[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:40:10.471[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:40:10.471[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: No

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:41:07,001][0m Trial 3 finished with value: 0.8373480902777778 and parameters: {'lr': 0.0027556186008022045}. Best is trial 3 with value: 0.8373480902777778.[0m


My suggestion
0.0011192231046584593
[32m2022-06-14 09:41:07.004[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:41:07.005[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:41:07.006[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:41:07.007[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:41:07.008[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:41:07.008[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: No

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:42:03,956][0m Trial 4 finished with value: 0.7991536458333334 and parameters: {'lr': 0.0011192231046584593}. Best is trial 3 with value: 0.8373480902777778.[0m


My suggestion
0.00388837124079873
[32m2022-06-14 09:42:03.958[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:42:03.959[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:42:03.960[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:42:03.961[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:42:03.961[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:42:03.962[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: None

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:43:00,569][0m Trial 5 finished with value: 0.8327907986111112 and parameters: {'lr': 0.00388837124079873}. Best is trial 3 with value: 0.8373480902777778.[0m


My suggestion
0.00320599649799676
[32m2022-06-14 09:43:00.572[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:43:00.573[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:43:00.573[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:43:00.574[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:43:00.575[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:43:00.576[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: None

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:43:57,500][0m Trial 6 finished with value: 0.8381076388888888 and parameters: {'lr': 0.00320599649799676}. Best is trial 6 with value: 0.8381076388888888.[0m


My suggestion
0.004899569978633089
[32m2022-06-14 09:43:57.502[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:43:57.503[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:43:57.504[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:43:57.505[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:43:57.506[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:43:57.506[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: Non

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:44:54,074][0m Trial 7 finished with value: 0.8291015625 and parameters: {'lr': 0.004899569978633089}. Best is trial 6 with value: 0.8381076388888888.[0m


My suggestion
0.01612558040441622
[32m2022-06-14 09:44:54.077[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:44:54.078[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:44:54.079[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:44:54.079[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:44:54.080[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:44:54.081[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: None

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:45:50,822][0m Trial 8 finished with value: 0.8337673611111112 and parameters: {'lr': 0.01612558040441622}. Best is trial 6 with value: 0.8381076388888888.[0m


My suggestion
0.002505511913619016
[32m2022-06-14 09:45:50.824[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:45:50.825[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:45:50.826[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:45:50.827[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:45:50.828[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:45:50.829[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: Non

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:46:47,584][0m Trial 9 finished with value: 0.8180338541666666 and parameters: {'lr': 0.002505511913619016}. Best is trial 6 with value: 0.8381076388888888.[0m


<optuna.study.study.Study at 0x7fda3fc58280>

## Hyperparameter search with ray-tune and optuna

In [3]:
args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --half --lr-d=0.01 --batch-size=1024",
                      "--n-epochs=5 --epoch-length-train=200000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=1e-5 --device=0 --beta1=0.9 --beta2=0.9", path_to_data=path_to_data)

study = Study(cifar10_algorithm_generator, args)

[32m2022-06-14 12:51:22.738[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m85[0m - [1mHyperparameter Optimization[0m
[32m2022-06-14 12:51:22.740[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m86[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 12:51:22.741[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m87[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 12:51:22.742[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 12:51:22.743[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1midentifier: debug_hp_optimization_20220614_125122[0m
[32m2022-06-14 12:51:22.745[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 12:51:22.746[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[

In [None]:
# hebo = HEBOSearch(metric="mean_accuracy", mode="max")

# analysis = study.tune(config={"lr_dense": tune.loguniform(1e-3, 2e-2),
#                               "weight_decay": tune.loguniform(1e-6, 1e-4),
#                               "gamma": tune.loguniform(.1, .9),
#                               "dropout": tune.uniform(0, .75),
#                               "scale_down": tune.uniform(0.4, .7),
#                               "scale_up": tune.uniform(0.9, 1.2),
#                               "ratio_down": tune.uniform(0.7, .95),
#                               "ratio_up": tune.uniform(1.05, 1.4),
#                               "channels": tune.choice([128, 256, 512]),
#                               "batch_size": tune.choice([512, 1024, 2048]),},
#                        metric="mean_accuracy",
#                        max_concurrent_trials=4,
#                        resources_per_trial={"gpu": 1},
#                        mode="max",
#                        search_alg=hebo,
#                       progress_reporter=JupyterNotebookReporter(overwrite=True),
#                        num_samples=400)


analysis = study.tune(config={"lr_dense": tune.loguniform(1e-3, 2e-2),
                              "weight_decay": tune.loguniform(1e-6, 1e-4),
                              "gamma": tune.loguniform(.1, .9),
                              "dropout": tune.uniform(0, .75),
                              "scale_down": tune.uniform(0.4, .7),
                              "scale_up": tune.uniform(0.9, 1.2),
                              "ratio_down": tune.uniform(0.7, .95),
                              "ratio_up": tune.uniform(1.05, 1.4),
                              "channels": tune.choice([128, 256, 512]),
                              "batch_size": tune.choice([512, 1024, 2048]),},
                       metric="mean_accuracy",
                       max_concurrent_trials=4,
                       resources_per_trial={"gpu": 1},
                       mode="max",
                       search_alg=OptunaSearch(),
                      progress_reporter=JupyterNotebookReporter(overwrite=True),
                       num_samples=400)


Trial name,status,loc,batch_size,channels,dropout,gamma,lr_dense,ratio_down,ratio_up,scale_down,scale_up,weight_decay,acc,iter,total time (s)
runner_tune_0d7f60d4,RUNNING,172.17.0.2:107145,512,512,0.544335,0.812667,0.00248187,0.723178,1.05638,0.680232,1.18267,4.84139e-05,0.853002,2,80.0163
runner_tune_e0652ce6,RUNNING,172.17.0.2:104311,512,512,0.552046,0.69653,0.00253653,0.819709,1.06885,0.674288,1.17596,1.77434e-05,0.880243,4,134.16
runner_tune_e6348540,RUNNING,172.17.0.2:104708,512,512,0.5467,0.688655,0.00261118,0.723102,1.08356,0.679148,1.13627,1.81614e-05,0.872533,4,135.975
runner_tune_f4929be0,RUNNING,172.17.0.2:105603,512,512,0.551451,0.709076,0.00246216,0.717842,1.07593,0.684372,1.19865,4.19013e-05,0.869757,3,104.331
runner_tune_0422009a,TERMINATED,172.17.0.2:43408,512,512,0.593296,0.320698,0.00755197,0.706525,1.23019,0.547238,1.12604,2.69908e-05,0.854955,5,166.396
runner_tune_0cf21424,TERMINATED,172.17.0.2:75507,512,128,0.524347,0.747728,0.00218162,0.731857,1.33853,0.596853,1.1485,1.0148e-06,0.810958,5,50.0915
runner_tune_12524db0,TERMINATED,172.17.0.2:28453,512,256,0.715172,0.103074,0.00139939,0.743094,1.15144,0.686847,0.923411,2.12862e-06,0.796669,5,76.6309
runner_tune_14d2f0e0,TERMINATED,172.17.0.2:91787,2048,512,0.478845,0.887274,0.00173694,0.77351,1.15507,0.698744,1.16926,1.39174e-05,0.870605,5,218.339
runner_tune_155c79be,TERMINATED,172.17.0.2:91789,2048,512,0.485775,0.899836,0.00166,0.741396,1.1582,0.695957,1.09889,1.32813e-05,0.865234,5,167.194
runner_tune_17e62cac,TERMINATED,172.17.0.2:92044,512,512,0.495628,0.422892,0.00270622,0.774116,1.26432,0.625411,1.11411,1.55294e-05,0.869449,5,168.611


Result for runner_tune_e0652ce6:
  date: 2022-06-14_13-31-18
  done: false
  experiment_id: 57948764d879419ba476a3fb1f71ebc9
  hostname: 8924131b90b2
  iterations_since_restore: 5
  mean_accuracy: 0.8845600328947368
  node_ip: 172.17.0.2
  pid: 104311
  time_since_restore: 166.15800619125366
  time_this_iter_s: 31.998493909835815
  time_total_s: 166.15800619125366
  timestamp: 1655213478
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: e0652ce6
  warmup_time: 0.0047833919525146484
  
Result for runner_tune_e0652ce6:
  date: 2022-06-14_13-31-18
  done: true
  experiment_id: 57948764d879419ba476a3fb1f71ebc9
  experiment_tag: 61_batch_size=512,channels=512,dropout=0.55205,gamma=0.69653,lr_dense=0.0025365,ratio_down=0.81971,ratio_up=1.0689,scale_down=0.67429,scale_up=1.176,weight_decay=1.7743e-05
  hostname: 8924131b90b2
  iterations_since_restore: 5
  mean_accuracy: 0.8845600328947368
  node_ip: 172.17.0.2
  pid: 104311
  time_since_restore: 166.15800619125366
  time_this_