In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import torch.nn.functional as F
from torch import nn
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

# manage ray's relative imports
import ray
runtime_env = {"working_dir": ".." }
ray.init(runtime_env=runtime_env, dashboard_port=13065, include_dashboard=True)

from ray import tune
from ray.tune.suggest.optuna import OptunaSearch
from ray.tune import JupyterNotebookReporter

# manage beams's relative imports
import sys
sys.path.append('..')

from src.beam import beam_arguments, Experiment, Study
from src.beam import UniversalDataset, UniversalBatchSampler
from src.beam import Algorithm
from src.beam import LinearNet
from torchvision import transforms
import matplotlib.pyplot as plt

from src.beam import DataTensor
from src.beam.utils import is_notebook
from cifar10_example import cifar10_algorithm_generator, Cifar10Network

from ray.tune.suggest.hebo import HEBOSearch

2022-06-20 09:11:49,276	INFO services.py:1456 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:13065[39m[22m
2022-06-20 09:11:49,781	INFO packaging.py:388 -- Creating a file package for local directory '..'.
2022-06-20 09:11:49,805	INFO packaging.py:241 -- Pushing file package 'gcs://_ray_pkg_bd2a01d656db6bf1.zip' (0.87MiB) to Ray cluster...
2022-06-20 09:11:49,819	INFO packaging.py:243 -- Successfully pushed file package 'gcs://_ray_pkg_bd2a01d656db6bf1.zip'.


In [2]:
# path_to_data = '/localdata/elads/data/datasets/cifar10'
# root_dir = '/localdata/elads/data/cifar10'

path_to_data = '/home/shared/data/dataset/cifar10'
root_dir = '/home/shared/data/results/cifar10'

## Training with a single worker

In [7]:
# here you put all actions which are performed only once before initializing the workers
# for example, setti`ng running arguments and experiment:

args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --device=1 --half --lr-d=1e-4 --batch-size=512",
                      "--n-epochs=40 --epoch-length-train=50000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=.00256 --beta1=0.9 --beta2=0.9", 
                      path_to_data=path_to_data, gamma=1., dropout=.0, activation='celu', channels=512,
                      scale_down=.7, scale_up=1.4, ratio_down=.7, ratio_up=1.4)

experiment = Experiment(args)

[32m2022-06-20 08:42:51.854[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m186[0m - [1mbeam project: cifar10[0m
[32m2022-06-20 08:42:51.856[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m187[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-20 08:42:51.857[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1mproject_name: cifar10[0m
[32m2022-06-20 08:42:51.860[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1midentifier: debug[0m
[32m2022-06-20 08:42:51.861[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-20 08:42:51.862[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1mmp_port: None[0m
[32m2022-06-20 08:42:51.863[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m192[0m - [1mroot_

## Train with single or multiple workers

In [8]:
alg = experiment(cifar10_algorithm_generator)

[32m2022-06-20 08:42:53.793[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m554[0m - [1mSingle worker mode[0m
[32m2022-06-20 08:42:53.795[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m48[0m - [1mWorker: 1/1 is running...[0m


train:   0%|          | 0/96 [00:00<?, ?it/s]

0.08


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:42:57.555[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 1/40:[0m
[32m2022-06-20 08:42:57.557[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:42:57.558[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 854.714 	|[0m
[32m2022-06-20 08:42:57.559[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.560872 	|[0m
[32m2022-06-20 08:42:57.559[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 0.0001 	|[0m
[32m2022-06-20 08:42:57.560[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:42:57.561[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 742.316 	|[0m
[32m2022-06-20 08:42:57.5

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.16


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:01.557[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 2/40:[0m
[32m2022-06-20 08:43:01.560[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:01.561[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 702.219 	|[0m
[32m2022-06-20 08:43:01.562[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.750366 	|[0m
[32m2022-06-20 08:43:01.563[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 8e-06 	|[0m
[32m2022-06-20 08:43:01.564[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:01.565[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 702.105 	|[0m
[32m2022-06-20 08:43:01.56

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.24


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:05.530[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 3/40:[0m
[32m2022-06-20 08:43:05.534[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:05.535[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 682.635 	|[0m
[32m2022-06-20 08:43:05.536[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.776042 	|[0m
[32m2022-06-20 08:43:05.537[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.6e-05 	|[0m
[32m2022-06-20 08:43:05.538[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:05.538[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 684.684 	|[0m
[32m2022-06-20 08:43:05.

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.32


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:09.646[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 4/40:[0m
[32m2022-06-20 08:43:09.650[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:09.651[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 659.583 	|[0m
[32m2022-06-20 08:43:09.652[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.804281 	|[0m
[32m2022-06-20 08:43:09.653[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.4e-05 	|[0m
[32m2022-06-20 08:43:09.654[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:09.655[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 664.974 	|[0m
[32m2022-06-20 08:43:09.

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.4


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:13.788[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 5/40:[0m
[32m2022-06-20 08:43:13.791[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:13.792[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 636.411 	|[0m
[32m2022-06-20 08:43:13.793[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.832255 	|[0m
[32m2022-06-20 08:43:13.794[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.2e-05 	|[0m
[32m2022-06-20 08:43:13.795[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:13.796[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 649.053 	|[0m
[32m2022-06-20 08:43:13.

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.38857142857142857


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:17.858[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 6/40:[0m
[32m2022-06-20 08:43:17.861[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:17.862[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 616.219 	|[0m
[32m2022-06-20 08:43:17.863[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.856771 	|[0m
[32m2022-06-20 08:43:17.864[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 4e-05 	|[0m
[32m2022-06-20 08:43:17.865[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:17.866[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 635.763 	|[0m
[32m2022-06-20 08:43:17.87

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.37714285714285717


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:22.049[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 7/40:[0m
[32m2022-06-20 08:43:22.052[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:22.053[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 594.812 	|[0m
[32m2022-06-20 08:43:22.054[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.883545 	|[0m
[32m2022-06-20 08:43:22.055[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.88571e-05 	|[0m
[32m2022-06-20 08:43:22.056[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:22.057[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 620.5 	|[0m
[32m2022-06-20 08:43:2

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.3657142857142857


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:26.166[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 8/40:[0m
[32m2022-06-20 08:43:26.169[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:26.170[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 575.745 	|[0m
[32m2022-06-20 08:43:26.171[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.907227 	|[0m
[32m2022-06-20 08:43:26.172[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.77143e-05 	|[0m
[32m2022-06-20 08:43:26.173[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:26.173[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 620.342 	|[0m
[32m2022-06-20 08:43

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.3542857142857143


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:30.216[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 9/40:[0m
[32m2022-06-20 08:43:30.219[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:30.220[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 559.536 	|[0m
[32m2022-06-20 08:43:30.221[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.92627 	|[0m
[32m2022-06-20 08:43:30.222[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.65714e-05 	|[0m
[32m2022-06-20 08:43:30.223[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:30.224[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 617.474 	|[0m
[32m2022-06-20 08:43:

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.34285714285714286


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:34.358[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 10/40:[0m
[32m2022-06-20 08:43:34.360[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:34.361[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 544.521 	|[0m
[32m2022-06-20 08:43:34.362[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.944234 	|[0m
[32m2022-06-20 08:43:34.363[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.54286e-05 	|[0m
[32m2022-06-20 08:43:34.364[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:34.365[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 603.579 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.33142857142857146


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:38.513[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 11/40:[0m
[32m2022-06-20 08:43:38.517[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:38.517[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 532.135 	|[0m
[32m2022-06-20 08:43:38.518[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.958577 	|[0m
[32m2022-06-20 08:43:38.519[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.42857e-05 	|[0m
[32m2022-06-20 08:43:38.521[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:38.521[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 607.658 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.32


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:42.599[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 12/40:[0m
[32m2022-06-20 08:43:42.601[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:42.602[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 520.779 	|[0m
[32m2022-06-20 08:43:42.603[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.970947 	|[0m
[32m2022-06-20 08:43:42.604[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.31429e-05 	|[0m
[32m2022-06-20 08:43:42.605[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:42.606[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 605.105 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.3085714285714286


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:46.640[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 13/40:[0m
[32m2022-06-20 08:43:46.642[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:46.643[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 509.763 	|[0m
[32m2022-06-20 08:43:46.644[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.979899 	|[0m
[32m2022-06-20 08:43:46.645[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.2e-05 	|[0m
[32m2022-06-20 08:43:46.646[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:46.647[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 606.553 	|[0m
[32m2022-06-20 08:43:46

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.29714285714285715


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:50.708[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 14/40:[0m
[32m2022-06-20 08:43:50.710[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:50.711[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 501.417 	|[0m
[32m2022-06-20 08:43:50.712[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.986287 	|[0m
[32m2022-06-20 08:43:50.713[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 3.08571e-05 	|[0m
[32m2022-06-20 08:43:50.714[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:50.715[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 599.053 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.28571428571428575


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:54.822[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 15/40:[0m
[32m2022-06-20 08:43:54.823[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:54.824[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 493.25 	|[0m
[32m2022-06-20 08:43:54.825[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.991089 	|[0m
[32m2022-06-20 08:43:54.826[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.97143e-05 	|[0m
[32m2022-06-20 08:43:54.827[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:54.828[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.579 	|[0m
[32m2022-06-20 08:43

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.2742857142857143


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:43:58.922[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 16/40:[0m
[32m2022-06-20 08:43:58.924[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:43:58.925[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 486.273 	|[0m
[32m2022-06-20 08:43:58.926[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.994792 	|[0m
[32m2022-06-20 08:43:58.927[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.85714e-05 	|[0m
[32m2022-06-20 08:43:58.928[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:43:58.929[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 604.763 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.2628571428571429


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:03.033[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 17/40:[0m
[32m2022-06-20 08:44:03.035[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:03.036[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 481.047 	|[0m
[32m2022-06-20 08:44:03.037[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.996908 	|[0m
[32m2022-06-20 08:44:03.038[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.74286e-05 	|[0m
[32m2022-06-20 08:44:03.039[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:03.040[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 599.711 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.25142857142857145


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:07.167[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 18/40:[0m
[32m2022-06-20 08:44:07.168[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:07.168[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 476.062 	|[0m
[32m2022-06-20 08:44:07.169[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.998108 	|[0m
[32m2022-06-20 08:44:07.169[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.62857e-05 	|[0m
[32m2022-06-20 08:44:07.170[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:07.170[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 596.211 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.24000000000000002


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:11.211[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 19/40:[0m
[32m2022-06-20 08:44:11.211[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:11.212[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 472.151 	|[0m
[32m2022-06-20 08:44:11.212[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.99882 	|[0m
[32m2022-06-20 08:44:11.212[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.51429e-05 	|[0m
[32m2022-06-20 08:44:11.213[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:11.214[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.211 	|[0m
[32m2022-06-20 08:44

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.2285714285714286


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:15.265[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 20/40:[0m
[32m2022-06-20 08:44:15.267[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:15.268[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 468.792 	|[0m
[32m2022-06-20 08:44:15.268[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.999491 	|[0m
[32m2022-06-20 08:44:15.269[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.4e-05 	|[0m
[32m2022-06-20 08:44:15.269[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:15.270[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598 	|[0m
[32m2022-06-20 08:44:15.270

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.21714285714285717


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:19.319[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 21/40:[0m
[32m2022-06-20 08:44:19.322[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:19.324[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 466.044 	|[0m
[32m2022-06-20 08:44:19.325[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.999797 	|[0m
[32m2022-06-20 08:44:19.325[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.28571e-05 	|[0m
[32m2022-06-20 08:44:19.326[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:19.327[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 596.026 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.20571428571428574


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:23.368[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 22/40:[0m
[32m2022-06-20 08:44:23.370[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:23.371[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 464.122 	|[0m
[32m2022-06-20 08:44:23.372[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.999776 	|[0m
[32m2022-06-20 08:44:23.373[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.17143e-05 	|[0m
[32m2022-06-20 08:44:23.374[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:23.375[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 596.289 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.1942857142857143


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:27.454[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 23/40:[0m
[32m2022-06-20 08:44:27.455[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:27.455[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 462.211 	|[0m
[32m2022-06-20 08:44:27.456[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.999736 	|[0m
[32m2022-06-20 08:44:27.456[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 2.05714e-05 	|[0m
[32m2022-06-20 08:44:27.457[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:27.457[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 596.974 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.18285714285714288


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:31.520[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 24/40:[0m
[32m2022-06-20 08:44:31.521[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:31.522[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 460.844 	|[0m
[32m2022-06-20 08:44:31.523[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.999919 	|[0m
[32m2022-06-20 08:44:31.524[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.94286e-05 	|[0m
[32m2022-06-20 08:44:31.525[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:31.526[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.684 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.17142857142857146


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:35.592[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 25/40:[0m
[32m2022-06-20 08:44:35.593[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:35.594[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 459.461 	|[0m
[32m2022-06-20 08:44:35.596[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.999959 	|[0m
[32m2022-06-20 08:44:35.597[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.82857e-05 	|[0m
[32m2022-06-20 08:44:35.598[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:35.599[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 597.711 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.16000000000000003


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:39.693[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 26/40:[0m
[32m2022-06-20 08:44:39.695[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:39.695[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 458.573 	|[0m
[32m2022-06-20 08:44:39.696[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.999959 	|[0m
[32m2022-06-20 08:44:39.697[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.71429e-05 	|[0m
[32m2022-06-20 08:44:39.697[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:39.698[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.447 	|[0m
[32m2022-06-20 08:4

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.14857142857142858


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:43.778[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 27/40:[0m
[32m2022-06-20 08:44:43.779[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:43.780[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 457.562 	|[0m
[32m2022-06-20 08:44:43.781[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.99998 	|[0m
[32m2022-06-20 08:44:43.782[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.6e-05 	|[0m
[32m2022-06-20 08:44:43.783[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:43.783[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.868 	|[0m
[32m2022-06-20 08:44:43.

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.13714285714285718


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:47.863[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 28/40:[0m
[32m2022-06-20 08:44:47.865[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:47.866[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 456.914 	|[0m
[32m2022-06-20 08:44:47.867[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.99998 	|[0m
[32m2022-06-20 08:44:47.868[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.48571e-05 	|[0m
[32m2022-06-20 08:44:47.869[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:47.870[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.658 	|[0m
[32m2022-06-20 08:44

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.12571428571428572


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:52.025[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 29/40:[0m
[32m2022-06-20 08:44:52.026[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:52.028[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 456.232 	|[0m
[32m2022-06-20 08:44:52.028[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 1 	|[0m
[32m2022-06-20 08:44:52.029[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.37143e-05 	|[0m
[32m2022-06-20 08:44:52.030[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:52.031[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.289 	|[0m
[32m2022-06-20 08:44:52.03

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.11428571428571432


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:44:56.074[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 30/40:[0m
[32m2022-06-20 08:44:56.076[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:44:56.077[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 455.763 	|[0m
[32m2022-06-20 08:44:56.078[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 1 	|[0m
[32m2022-06-20 08:44:56.079[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.25714e-05 	|[0m
[32m2022-06-20 08:44:56.080[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:44:56.081[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.342 	|[0m
[32m2022-06-20 08:44:56.08

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.10285714285714287


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:45:00.165[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 31/40:[0m
[32m2022-06-20 08:45:00.169[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:45:00.170[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 455.32 	|[0m
[32m2022-06-20 08:45:00.171[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 1 	|[0m
[32m2022-06-20 08:45:00.172[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.14286e-05 	|[0m
[32m2022-06-20 08:45:00.175[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:45:00.176[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.316 	|[0m
[32m2022-06-20 08:45:00.177

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.09142857142857147


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:45:04.225[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 32/40:[0m
[32m2022-06-20 08:45:04.226[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:45:04.227[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 454.951 	|[0m
[32m2022-06-20 08:45:04.228[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 0.99998 	|[0m
[32m2022-06-20 08:45:04.229[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 1.02857e-05 	|[0m
[32m2022-06-20 08:45:04.230[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:45:04.231[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.974 	|[0m
[32m2022-06-20 08:45

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.08000000000000002


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:45:08.301[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 33/40:[0m
[32m2022-06-20 08:45:08.303[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:45:08.304[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 454.63 	|[0m
[32m2022-06-20 08:45:08.305[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 1 	|[0m
[32m2022-06-20 08:45:08.306[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 9.14286e-06 	|[0m
[32m2022-06-20 08:45:08.307[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:45:08.308[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.632 	|[0m
[32m2022-06-20 08:45:08.309

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.06857142857142862


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:45:12.295[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 34/40:[0m
[32m2022-06-20 08:45:12.297[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:45:12.298[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 454.38 	|[0m
[32m2022-06-20 08:45:12.299[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 1 	|[0m
[32m2022-06-20 08:45:12.300[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 8e-06 	|[0m
[32m2022-06-20 08:45:12.301[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:45:12.301[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.342 	|[0m
[32m2022-06-20 08:45:12.302[0m |

train:   0%|          | 0/96 [00:00<?, ?it/s]

0.05714285714285716


test:   0%|          | 0/19 [00:00<?, ?it/s]


[32m2022-06-20 08:45:16.361[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m400[0m - [1mFinished epoch 35/40:[0m
[32m2022-06-20 08:45:16.363[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtrain:[0m
[32m2022-06-20 08:45:16.364[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 454.206 	|[0m
[32m2022-06-20 08:45:16.365[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1macc 1 	|[0m
[32m2022-06-20 08:45:16.366[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mlr 6.85714e-06 	|[0m
[32m2022-06-20 08:45:16.367[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m446[0m - [1mtest:[0m
[32m2022-06-20 08:45:16.368[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m450[0m - [1mloss 598.342 	|[0m
[32m2022-06-20 08:45:16.36

KeyboardInterrupt: 

In [9]:
np.mean(alg.evaluate('test')['scalar']['acc'])

NameError: name 'alg' is not defined

In [6]:
# alg.optimizers['net'].scaler.get_scale()

## Hyperparameter search with native optuna

In [None]:
args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --device=1 --half --lr-d=1e-4 --batch-size=512",
                      "--n-epochs=40 --epoch-length-train=50000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=.00256 --beta1=0.9 --beta2=0.9", 
                      path_to_data=path_to_data, gamma=1., dropout=.0, activation='celu', channels=512,
                      scale_down=.7, scale_up=1.4, ratio_down=.7, ratio_up=1.4)

study = Study(cifar10_algorithm_generator, args)

In [14]:
def suggest(trial):
    lr = trial.suggest_loguniform("lr", 1e-3, 2e-2)
    print('My suggestion')
    print(lr)
    return {'lr_dense': lr}
    

In [15]:
study.optuna(suggest, direction='maximize', n_jobs=1, n_trials=10)

[32m[I 2022-06-20 08:46:21,221][0m A new study created in memory with name: cifar10/CIFAR10Algorithm/debug_hp_optimization_20220620_084618[0m


My suggestion
0.009781018192999246
[32m2022-06-20 08:46:21.224[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrunner_optuna[0m:[36m114[0m - [1mNext Hyperparameter suggestion:[0m
[32m2022-06-20 08:46:21.225[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrunner_optuna[0m:[36m116[0m - [1mlr_dense: 0.009781018192999246[0m
[32m2022-06-20 08:46:21.229[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m262[0m - [1mCreating new experiment[0m
[32m2022-06-20 08:46:21.229[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m277[0m - [1mExperiment directory is: /home/shared/data/results/cifar10/cifar10/CIFAR10Algorithm/debug_hp_optimization_20220620_084618/0000_20220620_084621[0m
[32m2022-06-20 08:46:21.239[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m554[0m - [1mSingle worker mode[0m
[32m2022-06-20 08:46:21.240[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun

KeyboardInterrupt: 

## Hyperparameter search with ray-tune and optuna

In [3]:
args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --device=1 --half --lr-d=1e-4 --batch-size=512",
                      "--n-epochs=40 --epoch-length-train=50000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=.00256 --beta1=0.9 --beta2=0.9", 
                      path_to_data=path_to_data, gamma=1., dropout=.0, activation='celu', channels=512,
                      scale_down=.7, scale_up=1.4, ratio_down=.7, ratio_up=1.4)

study = Study(cifar10_algorithm_generator, args)

[32m2022-06-20 09:13:31.344[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m85[0m - [1mHyperparameter Optimization[0m
[32m2022-06-20 09:13:31.345[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m86[0m - [1mbeam project: cifar10[0m
[32m2022-06-20 09:13:31.346[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m87[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-20 09:13:31.348[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1mproject_name: cifar10[0m
[32m2022-06-20 09:13:31.350[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1midentifier: debug_hp_optimization_20220620_091331[0m
[32m2022-06-20 09:13:31.351[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-20 09:13:31.352[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[

In [None]:
# hebo = HEBOSearch(metric="mean_accuracy", mode="max")

# analysis = study.tune(config={"lr_dense": tune.loguniform(1e-3, 2e-2),
#                               "weight_decay": tune.loguniform(1e-6, 1e-4),
#                               "gamma": tune.loguniform(.1, .9),
#                               "dropout": tune.uniform(0, .75),
#                               "scale_down": tune.uniform(0.4, .7),
#                               "scale_up": tune.uniform(0.9, 1.2),
#                               "ratio_down": tune.uniform(0.7, .95),
#                               "ratio_up": tune.uniform(1.05, 1.4),
#                               "channels": tune.choice([128, 256, 512]),
#                               "batch_size": tune.choice([512, 1024, 2048]),},
#                        metric="mean_accuracy",
#                        max_concurrent_trials=4,
#                        resources_per_trial={"gpu": 1},
#                        mode="max",
#                        search_alg=hebo,
#                       progress_reporter=JupyterNotebookReporter(overwrite=True),
#                        num_samples=400)


analysis = study.tune(config={"lr_dense": tune.loguniform(1e-3, 2e-2),
                              "weight_decay": tune.loguniform(1e-6, 1e-4),
                              "gamma": tune.loguniform(.1, .9),
                              "dropout": tune.uniform(0, .75),
                              "scale_down": tune.uniform(0.4, .7),
                              "scale_up": tune.uniform(0.9, 1.5),
                              "ratio_down": tune.uniform(0.5, .95),
                              "ratio_up": tune.uniform(1.05, 1.5),
                              "channels": tune.choice([128, 256, 512]),
                              "batch_size": tune.choice([512, 1024, 2048]),
                              "activation": tune.choice(['relu', 'celu', 'gelu']),},
                       metric="mean_accuracy",
                       max_concurrent_trials=4,
                       resources_per_trial={"gpu": 2},
                       mode="max",
                       search_alg=OptunaSearch(),
                      progress_reporter=JupyterNotebookReporter(overwrite=True),
                       num_samples=400)


Trial name,status,loc,activation,batch_size,channels,dropout,gamma,lr_dense,ratio_down,ratio_up,scale_down,scale_up,weight_decay,acc,iter,total time (s)
runner_tune_0cd938f6,RUNNING,172.17.0.7:37347,gelu,2048,512,0.309856,0.244916,0.00461226,0.667457,1.19168,0.451295,1.39603,8.98847e-05,0.100586,40.0,113.829
runner_tune_0f93c30e,PENDING,,celu,2048,512,0.667971,0.355449,0.0129124,0.752898,1.37893,0.600333,0.95085,1.97476e-05,,,


Result for runner_tune_0cd938f6:
  date: 2022-06-20_09-21-11
  done: true
  experiment_id: 4ea3eae44790458a9de46bb1d7007127
  experiment_tag: 1_activation=gelu,batch_size=2048,channels=512,dropout=0.30986,gamma=0.24492,lr_dense=0.0046123,ratio_down=0.66746,ratio_up=1.1917,scale_down=0.4513,scale_up=1.396,weight_decay=8.9885e-05
  hostname: 56f38f239d73
  iterations_since_restore: 40
  mean_accuracy: 0.1005859375
  node_ip: 172.17.0.7
  pid: 37347
  time_since_restore: 113.82920527458191
  time_this_iter_s: 2.748023509979248
  time_total_s: 113.82920527458191
  timestamp: 1655716871
  timesteps_since_restore: 0
  training_iteration: 40
  trial_id: 0cd938f6
  warmup_time: 0.004626750946044922
  


2022-06-20 09:21:12,318	INFO trial_runner.py:803 -- starting runner_tune_0f93c30e


[2m[36m(runner_tune pid=37457)[0m [32m2022-06-20 09:21:15.715[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m262[0m - [1mCreating new experiment[0m
[2m[36m(runner_tune pid=37457)[0m [32m2022-06-20 09:21:15.716[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m277[0m - [1mExperiment directory is: /home/shared/data/results/cifar10/cifar10/CIFAR10Algorithm/debug_hp_optimization_20220620_091331/0001_20220620_092115[0m
[2m[36m(runner_tune pid=37457)[0m [32m2022-06-20 09:21:15.721[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m554[0m - [1mSingle worker mode[0m
[2m[36m(runner_tune pid=37457)[0m [32m2022-06-20 09:21:15.722[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m48[0m - [1mWorker: 1/1 is running...[0m
