In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import torch.nn.functional as F
from torch import nn
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

# manage ray's relative imports
# import ray
# runtime_env = {"working_dir": ".." }
# ray.init(runtime_env=runtime_env, dashboard_port=13065, include_dashboard=True)

from ray import tune
from ray.tune.suggest.optuna import OptunaSearch
from ray.tune import JupyterNotebookReporter

# manage beams's relative imports
import sys
sys.path.append('..')

from src.beam import beam_arguments, Experiment, Study
from src.beam import UniversalDataset, UniversalBatchSampler
from src.beam import Algorithm
from src.beam import LinearNet
from torchvision import transforms
import matplotlib.pyplot as plt

from src.beam import DataTensor
from src.beam.utils import is_notebook
from cifar10_example import cifar10_algorithm_generator, Cifar10Network

from ray.tune.suggest.hebo import HEBOSearch

In [2]:
path_to_data = '/localdata/elads/data/datasets/cifar10'
root_dir = '/localdata/elads/data/cifar10'

# path_to_data = '/home/shared/data/dataset/cifar10'
# root_dir = '/home/shared/data/results/cifar10'

## Training with a single worker

In [13]:
# here you put all actions which are performed only once before initializing the workers
# for example, setti`ng running arguments and experiment:

args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --device=1 --no-amp --lr-d=1e-4 --batch-size=512",
                      "--n-epochs=40 --epoch-length-train=50000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=.00256 --beta1=0.9 --beta2=0.9", 
                      path_to_data=path_to_data, gamma=1., dropout=.0, activation='celu', channels=512,
                      scale_down=.7, scale_up=1.4, ratio_down=.7, ratio_up=1.4)

experiment = Experiment(args)

[32m2022-06-19 22:57:15.890[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m186[0m - [1mbeam project: cifar10[0m
[32m2022-06-19 22:57:15.891[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m187[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-19 22:57:15.892[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1mproject_name: cifar10[0m
[32m2022-06-19 22:57:15.893[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1midentifier: debug[0m
[32m2022-06-19 22:57:15.894[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-19 22:57:15.895[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1mmp_port: None[0m
[32m2022-06-19 22:57:15.896[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1mroot_

## Train with single or multiple workers

In [None]:
alg = experiment(cifar10_algorithm_generator)

[32m2022-06-19 22:57:16.855[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m554[0m - [1mSingle worker mode[0m
[32m2022-06-19 22:57:16.857[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m48[0m - [1mWorker: 1/1 is running...[0m


train:   0%|          | 0/96 [00:00<?, ?it/s]

0.08


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-19 22:57:39.229[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 1/40:[0m
[32m2022-06-19 22:57:39.231[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-19 22:57:39.232[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 1314.15 	|[0m
[32m2022-06-19 22:57:39.233[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.438619 	|[0m
[32m2022-06-19 22:57:39.233[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 0.0001 	|[0m
[32m2022-06-19 22:57:39.234[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-19 22:57:39.235[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 1071.39 	|[0m
[32m2022-06-19 22:57:39.2

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.16


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-19 22:58:03.079[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 2/40:[0m
[32m2022-06-19 22:58:03.082[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-19 22:58:03.083[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 1042.07 	|[0m
[32m2022-06-19 22:58:03.084[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.624003 	|[0m
[32m2022-06-19 22:58:03.084[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 8e-06 	|[0m
[32m2022-06-19 22:58:03.085[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-19 22:58:03.086[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 1034.49 	|[0m
[32m2022-06-19 22:58:03.08

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.24


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-19 22:58:27.012[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 3/40:[0m
[32m2022-06-19 22:58:27.014[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-19 22:58:27.015[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 1007.28 	|[0m
[32m2022-06-19 22:58:27.015[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.653097 	|[0m
[32m2022-06-19 22:58:27.016[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.6e-05 	|[0m
[32m2022-06-19 22:58:27.017[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-19 22:58:27.018[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 993.682 	|[0m
[32m2022-06-19 22:58:27.

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.32


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-19 22:58:51.024[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 4/40:[0m
[32m2022-06-19 22:58:51.026[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-19 22:58:51.027[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 963.055 	|[0m
[32m2022-06-19 22:58:51.028[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.689738 	|[0m
[32m2022-06-19 22:58:51.029[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.4e-05 	|[0m
[32m2022-06-19 22:58:51.029[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-19 22:58:51.030[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 960.996 	|[0m
[32m2022-06-19 22:58:51.

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.4


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-19 22:59:15.032[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 5/40:[0m
[32m2022-06-19 22:59:15.034[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-19 22:59:15.035[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 916.717 	|[0m
[32m2022-06-19 22:59:15.036[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.729309 	|[0m
[32m2022-06-19 22:59:15.036[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.2e-05 	|[0m
[32m2022-06-19 22:59:15.037[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-19 22:59:15.038[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 927.133 	|[0m
[32m2022-06-19 22:59:15.

In [8]:
np.mean(alg.evaluate('test')['scalar']['acc'])

test:   0%|          | 0/20 [00:00<?, ?it/s]

0.8583065271377563

In [6]:
# alg.optimizers['net'].scaler.get_scale()

## Hyperparameter search with native optuna

In [3]:
args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --half --lr-d=0.01 --batch-size=1024",
                      "--n-epochs=4 --epoch-length-train=200000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=1e-5 --device=1 --beta1=0.9 --beta2=0.9", path_to_data=path_to_data)

study = Study(cifar10_algorithm_generator, args)

[32m2022-06-14 12:22:13.838[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m85[0m - [1mHyperparameter Optimization[0m
[32m2022-06-14 12:22:13.839[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m86[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 12:22:13.840[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m87[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 12:22:13.841[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 12:22:13.842[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1midentifier: debug_hp_optimization_20220614_122213[0m
[32m2022-06-14 12:22:13.843[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 12:22:13.844[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[

In [58]:
def suggest(trial):
    lr = trial.suggest_loguniform("lr", 1e-3, 2e-2)
    print('My suggestion')
    print(lr)
    return {'lr_dense': lr}
    

In [59]:
study.optuna(suggest, direction='maximize', n_jobs=1, n_trials=10)

[32m[I 2022-06-14 09:37:27,339][0m A new study created in memory with name: cifar10/CIFAR10Algorithm/debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m


My suggestion
0.004812922221223897
[32m2022-06-14 09:37:27.342[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:37:27.343[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:37:27.345[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:37:27.345[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:37:27.346[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:37:27.347[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: Non

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:38:20,651][0m Trial 0 finished with value: 0.8161892361111112 and parameters: {'lr': 0.004812922221223897}. Best is trial 0 with value: 0.8161892361111112.[0m


My suggestion
0.004845938308034776
[32m2022-06-14 09:38:20.653[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:38:20.654[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:38:20.655[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:38:20.656[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:38:20.657[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:38:20.657[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: Non

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:39:14,827][0m Trial 1 finished with value: 0.8365885416666666 and parameters: {'lr': 0.004845938308034776}. Best is trial 1 with value: 0.8365885416666666.[0m


My suggestion
0.01711712997670714
[32m2022-06-14 09:39:14.829[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:39:14.830[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:39:14.831[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:39:14.832[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:39:14.833[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:39:14.833[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: None

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:40:10,465][0m Trial 2 finished with value: 0.8347439236111112 and parameters: {'lr': 0.01711712997670714}. Best is trial 1 with value: 0.8365885416666666.[0m


My suggestion
0.0027556186008022045
[32m2022-06-14 09:40:10.467[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:40:10.468[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:40:10.469[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:40:10.470[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:40:10.471[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:40:10.471[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: No

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:41:07,001][0m Trial 3 finished with value: 0.8373480902777778 and parameters: {'lr': 0.0027556186008022045}. Best is trial 3 with value: 0.8373480902777778.[0m


My suggestion
0.0011192231046584593
[32m2022-06-14 09:41:07.004[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:41:07.005[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:41:07.006[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:41:07.007[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:41:07.008[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:41:07.008[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: No

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:42:03,956][0m Trial 4 finished with value: 0.7991536458333334 and parameters: {'lr': 0.0011192231046584593}. Best is trial 3 with value: 0.8373480902777778.[0m


My suggestion
0.00388837124079873
[32m2022-06-14 09:42:03.958[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:42:03.959[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:42:03.960[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:42:03.961[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:42:03.961[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:42:03.962[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: None

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:43:00,569][0m Trial 5 finished with value: 0.8327907986111112 and parameters: {'lr': 0.00388837124079873}. Best is trial 3 with value: 0.8373480902777778.[0m


My suggestion
0.00320599649799676
[32m2022-06-14 09:43:00.572[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:43:00.573[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:43:00.573[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:43:00.574[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:43:00.575[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:43:00.576[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: None

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:43:57,500][0m Trial 6 finished with value: 0.8381076388888888 and parameters: {'lr': 0.00320599649799676}. Best is trial 6 with value: 0.8381076388888888.[0m


My suggestion
0.004899569978633089
[32m2022-06-14 09:43:57.502[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:43:57.503[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:43:57.504[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:43:57.505[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:43:57.506[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:43:57.506[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: Non

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:44:54,074][0m Trial 7 finished with value: 0.8291015625 and parameters: {'lr': 0.004899569978633089}. Best is trial 6 with value: 0.8381076388888888.[0m


My suggestion
0.01612558040441622
[32m2022-06-14 09:44:54.077[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:44:54.078[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:44:54.079[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:44:54.079[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:44:54.080[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:44:54.081[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: None

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:45:50,822][0m Trial 8 finished with value: 0.8337673611111112 and parameters: {'lr': 0.01612558040441622}. Best is trial 6 with value: 0.8381076388888888.[0m


My suggestion
0.002505511913619016
[32m2022-06-14 09:45:50.824[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m169[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 09:45:50.825[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m170[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 09:45:50.826[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 09:45:50.827[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1midentifier: debug_hp_optimization_20220614_093207_hp_optimization_20220614_093407_hp_optimization_20220614_093726[0m
[32m2022-06-14 09:45:50.828[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 09:45:50.829[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m174[0m - [1mmp_port: Non

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

train:   0%|          | 0/194 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]

[32m[I 2022-06-14 09:46:47,584][0m Trial 9 finished with value: 0.8180338541666666 and parameters: {'lr': 0.002505511913619016}. Best is trial 6 with value: 0.8381076388888888.[0m


<optuna.study.study.Study at 0x7fda3fc58280>

## Hyperparameter search with ray-tune and optuna

In [3]:
args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --half --lr-d=0.01 --batch-size=1024",
                      "--n-epochs=5 --epoch-length-train=200000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=1e-5 --device=0 --beta1=0.9 --beta2=0.9", path_to_data=path_to_data)

study = Study(cifar10_algorithm_generator, args)

[32m2022-06-15 05:13:36.288[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m85[0m - [1mHyperparameter Optimization[0m
[32m2022-06-15 05:13:36.289[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m86[0m - [1mbeam project: cifar10[0m
[32m2022-06-15 05:13:36.290[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m87[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-15 05:13:36.292[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1mproject_name: cifar10[0m
[32m2022-06-15 05:13:36.293[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1midentifier: debug_hp_optimization_20220615_051336[0m
[32m2022-06-15 05:13:36.295[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-15 05:13:36.296[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[

In [None]:
# hebo = HEBOSearch(metric="mean_accuracy", mode="max")

# analysis = study.tune(config={"lr_dense": tune.loguniform(1e-3, 2e-2),
#                               "weight_decay": tune.loguniform(1e-6, 1e-4),
#                               "gamma": tune.loguniform(.1, .9),
#                               "dropout": tune.uniform(0, .75),
#                               "scale_down": tune.uniform(0.4, .7),
#                               "scale_up": tune.uniform(0.9, 1.2),
#                               "ratio_down": tune.uniform(0.7, .95),
#                               "ratio_up": tune.uniform(1.05, 1.4),
#                               "channels": tune.choice([128, 256, 512]),
#                               "batch_size": tune.choice([512, 1024, 2048]),},
#                        metric="mean_accuracy",
#                        max_concurrent_trials=4,
#                        resources_per_trial={"gpu": 1},
#                        mode="max",
#                        search_alg=hebo,
#                       progress_reporter=JupyterNotebookReporter(overwrite=True),
#                        num_samples=400)


analysis = study.tune(config={"lr_dense": tune.loguniform(1e-3, 2e-2),
                              "weight_decay": tune.loguniform(1e-6, 1e-4),
                              "gamma": tune.loguniform(.1, .9),
                              "dropout": tune.uniform(0, .75),
                              "scale_down": tune.uniform(0.4, .7),
                              "scale_up": tune.uniform(0.9, 1.5),
                              "ratio_down": tune.uniform(0.5, .95),
                              "ratio_up": tune.uniform(1.05, 1.5),
                              "channels": tune.choice([128, 256, 512]),
                              "batch_size": tune.choice([512, 1024, 2048]),
                              "activation": tune.choice(['relu', 'celu', 'gelu']),},
                       metric="mean_accuracy",
                       max_concurrent_trials=4,
                       resources_per_trial={"gpu": 1},
                       mode="max",
                       search_alg=OptunaSearch(),
                      progress_reporter=JupyterNotebookReporter(overwrite=True),
                       num_samples=400)


Trial name,status,loc,activation,batch_size,channels,dropout,gamma,lr_dense,ratio_down,ratio_up,scale_down,scale_up,weight_decay,acc,iter,total time (s)
runner_tune_1fc65780,RUNNING,172.17.0.2:14444,gelu,512,512,0.431007,0.649843,0.00222929,0.942077,1.14428,0.699359,1.12334,1.00237e-06,0.895354,3,185.357
runner_tune_6db3aa7e,RUNNING,172.17.0.2:19125,gelu,512,512,0.443993,0.643705,0.00169571,0.944341,1.1474,0.672401,0.996135,1.16104e-06,0.831003,1,68.9715
runner_tune_7f152a7c,RUNNING,172.17.0.2:20189,celu,512,512,0.432513,0.569495,0.00212933,0.941401,1.13315,0.678664,1.33784,1.0003e-06,0.829359,1,68.1938
runner_tune_ec2b1c3a,RUNNING,172.17.0.2:11332,gelu,512,512,0.429461,0.648061,0.00167785,0.941644,1.13603,0.674929,0.995023,1.00011e-06,0.899877,4,282.289
runner_tune_008da1d2,TERMINATED,172.17.0.2:72955,gelu,512,512,0.446906,0.522958,0.00262392,0.892363,1.11359,0.681297,1.10811,3.4133e-06,0.900082,5,309.081
runner_tune_00ac0764,TERMINATED,172.17.0.2:38036,celu,1024,512,0.118822,0.187401,0.00270857,0.502787,1.49771,0.694636,1.48353,3.49456e-05,0.887261,5,303.059
runner_tune_0122aff2,TERMINATED,172.17.0.2:109731,gelu,512,512,0.359209,0.775842,0.00237862,0.932516,1.11985,0.6882,1.08345,1.03562e-06,0.904708,5,349.518
runner_tune_04615906,TERMINATED,172.17.0.2:51954,gelu,512,512,0.535899,0.765737,0.00172625,0.943377,1.06392,0.674356,1.18738,1.9834e-06,0.900185,5,350.316
runner_tune_0471e192,TERMINATED,172.17.0.2:61544,gelu,512,512,0.550578,0.62975,0.00245628,0.92835,1.12296,0.675259,1.14215,1.21565e-06,0.902755,5,307.457
runner_tune_05328bea,TERMINATED,172.17.0.2:94449,gelu,512,512,0.426806,0.844516,0.00353867,0.895771,1.08425,0.627165,1.04606,1.94761e-06,0.898643,5,302.734


Result for runner_tune_1fc65780:
  date: 2022-06-15_11-31-30
  done: false
  experiment_id: b20fbd3406c647fd98b1cf5c45f26155
  hostname: 8924131b90b2
  iterations_since_restore: 4
  mean_accuracy: 0.8846628289473685
  node_ip: 172.17.0.2
  pid: 14444
  time_since_restore: 244.4695918560028
  time_this_iter_s: 59.11254024505615
  time_total_s: 244.4695918560028
  timestamp: 1655292690
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 1fc65780
  warmup_time: 0.0043680667877197266
  
