In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import torch.nn.functional as F
from torch import nn
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

import sys
sys.path.append('..')

from src.beam import beam_arguments, Experiment
from src.beam import UniversalDataset, UniversalBatchSampler
from src.beam import Algorithm
from src.beam import LinearNet

from src.beam import DataTensor
from src.beam.utils import is_notebook

from cifar10_example import run_cifar10, cifar10_algorithm_generator, Cifar10Network

In [2]:
path_to_data = '/localdata/elads/data/datasets/cifar10'
root_dir = '/localdata/elads/data/cifar10'

# path_to_data = '/home/shared/data/dataset/cifar10'
# root_dir = '/home/shared/data/results/cifar10'

## Training with a single worker

In [3]:
# here you put all actions which are performed only once before initializing the workers
# for example, setting running arguments and experiment:

args = beam_arguments(f"--project-name=cifar10 --root-dir={root_dir} --algorithm=CIFAR10Algorithm --device=1 --half --lr-d=0.01 --batch-size=2048",
                      "--n-epochs=6 --epoch-length-train=1000000 --epoch-length-eval=10000 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
                      "--weight-decay=1e-5 --beta1=0.9 --beta2=0.999", path_to_data=path_to_data)

experiment = Experiment(args)

[32m2022-06-13 22:35:01.066[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m100[0m - [1mbeam project: cifar10[0m
[32m2022-06-13 22:35:01.067[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m101[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-13 22:35:01.068[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1mproject_name: cifar10[0m
[32m2022-06-13 22:35:01.069[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1midentifier: debug[0m
[32m2022-06-13 22:35:01.070[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1malgorithm: CIFAR10Algorithm[0m
[32m2022-06-13 22:35:01.070[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1mmp_port: None[0m
[32m2022-06-13 22:35:01.071[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m105[0m - [1mroot_

## Train with single or multiple workers

In [4]:
alg = experiment(cifar10_algorithm_generator)

[32m2022-06-13 22:35:01.535[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m433[0m - [1mSingle worker mode[0m
[32m2022-06-13 22:35:01.536[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m45[0m - [1mWorker: 1/1 is running...[0m


train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/4 [00:00<?, ?it/s]


[32m2022-06-13 22:35:40.603[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 1/6:[0m
[32m2022-06-13 22:35:40.607[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-13 22:35:40.608[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 1520.83 	|[0m
[32m2022-06-13 22:35:40.609[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.737819 	|[0m
[32m2022-06-13 22:35:40.609[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 0.01 	|[0m
[32m2022-06-13 22:35:40.610[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-13 22:35:40.611[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 1148 	|[0m
[32m2022-06-13 22:35:40.612[0m

train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/4 [00:00<?, ?it/s]


[32m2022-06-13 22:36:13.420[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 2/6:[0m
[32m2022-06-13 22:36:13.425[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-13 22:36:13.426[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 827.916 	|[0m
[32m2022-06-13 22:36:13.427[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.859264 	|[0m
[32m2022-06-13 22:36:13.427[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 0.00316228 	|[0m
[32m2022-06-13 22:36:13.428[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-13 22:36:13.429[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 910.625 	|[0m
[32m2022-06-13 22:36:1

train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/4 [00:00<?, ?it/s]


[32m2022-06-13 22:36:46.662[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 3/6:[0m
[32m2022-06-13 22:36:46.665[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-13 22:36:46.666[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 696.989 	|[0m
[32m2022-06-13 22:36:46.666[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.881098 	|[0m
[32m2022-06-13 22:36:46.667[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 0.001 	|[0m
[32m2022-06-13 22:36:46.668[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-13 22:36:46.669[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 872.5 	|[0m
[32m2022-06-13 22:36:46.669[

train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/4 [00:00<?, ?it/s]


[32m2022-06-13 22:37:19.978[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 4/6:[0m
[32m2022-06-13 22:37:19.981[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-13 22:37:19.982[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 644.859 	|[0m
[32m2022-06-13 22:37:19.983[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.889963 	|[0m
[32m2022-06-13 22:37:19.983[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 0.000316228 	|[0m
[32m2022-06-13 22:37:19.984[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-13 22:37:19.985[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 772.375 	|[0m
[32m2022-06-13 22:37:

train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/4 [00:00<?, ?it/s]


[32m2022-06-13 22:37:53.253[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 5/6:[0m
[32m2022-06-13 22:37:53.256[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-13 22:37:53.256[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 630.768 	|[0m
[32m2022-06-13 22:37:53.257[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.892696 	|[0m
[32m2022-06-13 22:37:53.258[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 0.0001 	|[0m
[32m2022-06-13 22:37:53.259[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-13 22:37:53.259[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 782 	|[0m
[32m2022-06-13 22:37:53.260[0

train:   0%|          | 0/487 [00:00<?, ?it/s]

test:   0%|          | 0/4 [00:00<?, ?it/s]


[32m2022-06-13 22:38:26.475[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m309[0m - [1mFinished epoch 6/6:[0m
[32m2022-06-13 22:38:26.478[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtrain:[0m
[32m2022-06-13 22:38:26.479[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 621.452 	|[0m
[32m2022-06-13 22:38:26.479[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1macc 0.894037 	|[0m
[32m2022-06-13 22:38:26.480[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mlr 3.16228e-05 	|[0m
[32m2022-06-13 22:38:26.481[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m347[0m - [1mtest:[0m
[32m2022-06-13 22:38:26.482[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m351[0m - [1mloss 791 	|[0m
[32m2022-06-13 22:38:26.4

In [5]:
np.mean(alg.evaluate('test')['scalar']['acc'])

test:   0%|          | 0/5 [00:00<?, ?it/s]

0.8601277351379395

In [6]:
# alg.optimizers['net'].scaler.get_scale()