In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import torch.nn.functional as F
from torch import nn
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

import sys
sys.path.append('..')

from src.beam import beam_arguments, Experiment
from src.beam import UniversalDataset, UniversalBatchSampler
from src.beam import Algorithm
from src.beam import LinearNet

from src.beam import DataTensor
from src.beam.utils import is_notebook

from cifar10_example import run_cifar10, cifar10_algorithm_generator, Cifar10Network

In [2]:
path_to_data = '/localdata/elads/data/datasets/cifar10'
root_dir = '/localdata/elads/data/cifar10'

# path_to_data = '/home/shared/data/dataset/cifar10'
# root_dir = '/home/shared/data/results/cifar10'

## Training with a single worker

In [3]:
# here you put all actions which are performed only once before initializing the workers
# for example, setting running arguments and experiment:

args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --device=1 --half --lr-d=0.01 --batch-size=1024",
                      "--n-epochs=6 --epoch-length-train=500000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=1e-5 --beta1=0.9 --beta2=0.9", path_to_data=path_to_data)

experiment = Experiment(args)

[32m2022-06-14 04:35:25.566[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m100[0m - [1mbeam project: cifar10[0m
[32m2022-06-14 04:35:25.568[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m101[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-14 04:35:25.568[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1mproject_name: cifar10[0m
[32m2022-06-14 04:35:25.569[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1midentifier: debug[0m
[32m2022-06-14 04:35:25.570[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-14 04:35:25.571[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1mmp_port: None[0m
[32m2022-06-14 04:35:25.571[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1mroot_

## Train with single or multiple workers

In [None]:
alg = experiment(cifar10_algorithm_generator)

[32m2022-06-14 04:35:26.597[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m433[0m - [1mSingle worker mode[0m
[32m2022-06-14 04:35:26.598[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m45[0m - [1mWorker: 1/1 is running...[0m


train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 04:36:08.645[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 1/6:[0m
[32m2022-06-14 04:36:08.648[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-14 04:36:08.649[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 776.154 	|[0m
[32m2022-06-14 04:36:08.650[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.731784 	|[0m
[32m2022-06-14 04:36:08.650[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 0.01 	|[0m
[32m2022-06-14 04:36:08.651[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-14 04:36:08.652[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 582.278 	|[0m
[32m2022-06-14 04:36:08.653

train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 04:36:41.848[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 2/6:[0m
[32m2022-06-14 04:36:41.851[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-14 04:36:41.852[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 363.094 	|[0m
[32m2022-06-14 04:36:41.853[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.876993 	|[0m
[32m2022-06-14 04:36:41.853[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 0.00316228 	|[0m
[32m2022-06-14 04:36:41.854[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-14 04:36:41.855[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 425.639 	|[0m
[32m2022-06-14 04:36:4

train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 04:37:15.401[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 3/6:[0m
[32m2022-06-14 04:37:15.404[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-14 04:37:15.404[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 278.275 	|[0m
[32m2022-06-14 04:37:15.405[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.905406 	|[0m
[32m2022-06-14 04:37:15.406[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 0.001 	|[0m
[32m2022-06-14 04:37:15.407[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-14 04:37:15.407[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 379.361 	|[0m
[32m2022-06-14 04:37:15.408

train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/9 [00:00<?, ?it/s]


[32m2022-06-14 04:37:49.157[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 4/6:[0m
[32m2022-06-14 04:37:49.160[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-14 04:37:49.161[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 253.185 	|[0m
[32m2022-06-14 04:37:49.162[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.913912 	|[0m
[32m2022-06-14 04:37:49.162[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 0.000316228 	|[0m
[32m2022-06-14 04:37:49.163[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-14 04:37:49.164[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 400.528 	|[0m
[32m2022-06-14 04:37:

train:   0%|          | 0/487 [00:00<?, ?it/s]

In [None]:
np.mean(alg.evaluate('test')['scalar']['acc'])

In [None]:
# alg.optimizers['net'].scaler.get_scale()