In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import torch.nn.functional as F
from torch import nn
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

from src.beam import beam_arguments, Experiment
from src.beam import UniversalDataset, UniversalBatchSampler
from src.beam import Algorithm
from src.beam import LinearNet

from src.beam import DataTensor
from src.beam.utils import is_notebook
from mnist_example import run_mnist, mnist_algorithm_generator

## Training with a single worker

In [2]:
# here you put all actions which are performed only once before initializing the workers
# for example, setting running arguments and experiment:

args = beam_arguments("--project-name=mnist --root-dir=/home/shared/data/results --algorithm=MNISTAlgorithm",
                      "--epoch-length=100000 --n-epochs=2 --clip=1 --parallel=1", path_to_data='/home/elad/projects/mnist')

experiment = Experiment(args)

[32m2022-05-08 10:59:58.515[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m89[0m - [1mbeam project: mnist[0m
[32m2022-05-08 10:59:58.517[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1mSimulation Hyperparameters[0m
[32m2022-05-08 10:59:58.518[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1mproject_name: mnist[0m
[32m2022-05-08 10:59:58.519[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1midentifier: debug[0m
[32m2022-05-08 10:59:58.520[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1malgorithm: MNISTAlgorithm[0m
[32m2022-05-08 10:59:58.521[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1mmp_port: None[0m
[32m2022-05-08 10:59:58.522[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1mroot_dir: /home/sh

we can generate an untrained algorithm with

In [3]:
alg = mnist_algorithm_generator(experiment)

or we can apply the default training routin with

In [4]:
alg = experiment(mnist_algorithm_generator, experiment)

[32m2022-05-08 10:59:59.952[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m417[0m - [1mSingle worker mode[0m
[32m2022-05-08 10:59:59.954[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m40[0m - [1mWorker: 1/1 is running...[0m


train:   0%|          | 0/389 [00:00<?, ?it/s]

validation:   0%|          | 0/389 [00:00<?, ?it/s]


[32m2022-05-08 11:00:04.249[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 1/2:[0m
[32m2022-05-08 11:00:04.261[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:04.262[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.337824 	|[0m
[32m2022-05-08 11:00:04.263[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.900697 	|[0m
[32m2022-05-08 11:00:04.263[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.00099 	|[0m
[32m2022-05-08 11:00:04.264[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:04.264[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.131608 	|[0m
[32m2022-05-08 11

train:   0%|          | 0/389 [00:00<?, ?it/s]

validation:   0%|          | 0/389 [00:00<?, ?it/s]


[32m2022-05-08 11:00:05.890[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 2/2:[0m
[32m2022-05-08 11:00:05.895[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:05.896[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0885447 	|[0m
[32m2022-05-08 11:00:05.897[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.973128 	|[0m
[32m2022-05-08 11:00:05.898[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.0009801 	|[0m
[32m2022-05-08 11:00:05.899[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:05.900[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0954773 	|[0m
[32m2022-05-0

we can now access the trained algorithm

In [5]:
alg.networks['net']

LinearNet(
  (lin): Sequential(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): ReLU()
    (2): Linear(in_features=256, out_features=256, bias=True)
    (3): ReLU()
    (4): Linear(in_features=256, out_features=256, bias=True)
    (5): ReLU()
    (6): Linear(in_features=256, out_features=10, bias=True)
  )
)

## Inference

In [6]:
inference = alg('test')

print('Test inference results:')
for n, v in inference['metrics'].items():
    print(f'{n}:')
    print(v)

test:   0%|          | 0/39 [00:00<?, ?it/s]

Test inference results:
precision:
[0.97085427 0.98677249 0.97747307 0.96278159 0.97040816 0.95414847
 0.98187633 0.9707887  0.96114519 0.97474747]
recall:
[0.98571429 0.98590308 0.96705426 0.97326733 0.96843177 0.97982063
 0.96137787 0.96984436 0.9650924  0.95639247]
fscore:
[0.97822785 0.98633759 0.97223575 0.96799606 0.96941896 0.96681416
 0.97151899 0.9703163  0.96311475 0.96548274]
support:
[ 980 1135 1032 1010  982  892  958 1028  974 1009]


we can also run our own experiment routine and obtain the results

In [7]:
alg, results = experiment.run(run_mnist)

[32m2022-05-08 11:00:06.316[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m417[0m - [1mSingle worker mode[0m
[32m2022-05-08 11:00:06.318[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m40[0m - [1mWorker: 1/1 is running...[0m


train:   0%|          | 0/389 [00:00<?, ?it/s]

validation:   0%|          | 0/389 [00:00<?, ?it/s]


[32m2022-05-08 11:00:08.134[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 3/2:[0m
[32m2022-05-08 11:00:08.137[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:08.138[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.338252 	|[0m
[32m2022-05-08 11:00:08.139[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.905668 	|[0m
[32m2022-05-08 11:00:08.139[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.00099 	|[0m
[32m2022-05-08 11:00:08.140[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:08.140[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.147575 	|[0m
[32m2022-05-08 11

train:   0%|          | 0/389 [00:00<?, ?it/s]

validation:   0%|          | 0/389 [00:00<?, ?it/s]


[32m2022-05-08 11:00:10.019[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 4/2:[0m
[32m2022-05-08 11:00:10.026[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:10.027[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0864915 	|[0m
[32m2022-05-08 11:00:10.027[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.973962 	|[0m
[32m2022-05-08 11:00:10.028[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.0009801 	|[0m
[32m2022-05-08 11:00:10.028[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:10.029[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.101176 	|[0m
[32m2022-05-08

In [8]:
results['validation']['scalar']['loss']

[0.15844836831092834,
 0.0894060730934143,
 0.07808587700128555,
 0.08715943992137909,
 0.19193102419376373,
 0.1076071634888649,
 0.07562798261642456,
 0.09282419830560684,
 0.12268974632024765,
 0.12891240417957306,
 0.1535983830690384,
 0.10704080760478973,
 0.07671689242124557,
 0.10214642435312271,
 0.2137703150510788,
 0.0781518742442131,
 0.10518572479486465,
 0.13672630488872528,
 0.09374455362558365,
 0.10820126533508301,
 0.08609598875045776,
 0.05154235661029816,
 0.08499065041542053,
 0.07336609065532684,
 0.054493051022291183,
 0.05346296727657318,
 0.0949094370007515,
 0.10131993889808655,
 0.1779078245162964,
 0.1125800609588623,
 0.10923656821250916,
 0.061509281396865845,
 0.10921038687229156,
 0.12424713373184204,
 0.0808035358786583,
 0.12425591051578522,
 0.10601877421140671,
 0.06873971968889236,
 0.08330176025629044,
 0.08505051583051682,
 0.07699783146381378,
 0.10617578029632568,
 0.06872368603944778,
 0.11869192123413086,
 0.03573107346892357,
 0.15712310373783

## Inference

In [9]:
inference = alg('test')

print('Test inference results:')
for n, v in inference['metrics'].items():
    print(f'{n}:')
    print(v)

test:   0%|          | 0/39 [00:00<?, ?it/s]

Test inference results:
precision:
[0.98466258 0.9885159  0.96003806 0.93175355 0.96385542 0.97362385
 0.97288843 0.9581749  0.97746781 0.98252826]
recall:
[0.98265306 0.98590308 0.97771318 0.97326733 0.97759674 0.95179372
 0.97390397 0.98054475 0.93531828 0.94747275]
fscore:
[0.98365679 0.98720776 0.96879501 0.95205811 0.97067745 0.96258503
 0.97339593 0.96923077 0.95592865 0.96468214]
support:
[ 980 1135 1032 1010  982  892  958 1028  974 1009]


## Reload algorithm and resume training

In [10]:
# here you put all actions which are performed only once before initializing the workers
# for example, setting running arguments and experiment:

args = beam_arguments("--project-name=mnist --root-dir=/home/shared/data/results --algorithm=MNISTAlgorithm",
                      "--epoch-length=100000 --n-epochs=2 --clip=1 --parallel=1 --reload", 
                      path_to_data='/home/elad/projects/mnist')

experiment = Experiment(args)

[32m2022-05-08 11:00:10.463[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m89[0m - [1mbeam project: mnist[0m
[32m2022-05-08 11:00:10.464[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1mSimulation Hyperparameters[0m
[32m2022-05-08 11:00:10.464[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1mproject_name: mnist[0m
[32m2022-05-08 11:00:10.465[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1midentifier: debug[0m
[32m2022-05-08 11:00:10.466[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1malgorithm: MNISTAlgorithm[0m
[32m2022-05-08 11:00:10.466[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1mmp_port: None[0m
[32m2022-05-08 11:00:10.467[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1mroot_dir: /home/sh

In [11]:
alg = experiment(mnist_algorithm_generator, experiment)

[32m2022-05-08 11:00:10.525[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m417[0m - [1mSingle worker mode[0m
[32m2022-05-08 11:00:10.525[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m40[0m - [1mWorker: 1/1 is running...[0m
[32m2022-05-08 11:00:10.609[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mreload_checkpoint[0m:[36m233[0m - [1mReload experiment from checkpoint: /home/shared/data/results/mnist/MNISTAlgorithm_debug_exp_0000_20220508_095246/checkpoints/checkpoint_000004[0m


train:   0%|          | 0/389 [00:00<?, ?it/s]

validation:   0%|          | 0/389 [00:00<?, ?it/s]


[32m2022-05-08 11:00:12.325[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 1/2:[0m
[32m2022-05-08 11:00:12.329[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:12.329[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0465994 	|[0m
[32m2022-05-08 11:00:12.330[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.98566 	|[0m
[32m2022-05-08 11:00:12.330[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.000970299 	|[0m
[32m2022-05-08 11:00:12.331[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:12.331[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0985175 	|[0m
[32m2022-05-

train:   0%|          | 0/389 [00:00<?, ?it/s]

validation:   0%|          | 0/389 [00:00<?, ?it/s]


[32m2022-05-08 11:00:14.107[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 2/2:[0m
[32m2022-05-08 11:00:14.111[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:14.112[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0284482 	|[0m
[32m2022-05-08 11:00:14.112[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.991123 	|[0m
[32m2022-05-08 11:00:14.113[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.000960596 	|[0m
[32m2022-05-08 11:00:14.113[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:14.114[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0876903 	|[0m
[32m2022-05

## Training with two workers

In [12]:
# here you put all actions which are performed only once before initializing the workers
# for example, setting running arguments and experiment:

args = beam_arguments("--project-name=mnist --root-dir=/home/shared/data/results --algorithm=MNISTAlgorithm",
                      "--epoch-length=100000 --n-epochs=2 --clip=1 --parallel=1 --parallel=2",
                      path_to_data='/home/elad/projects/mnist')

experiment = Experiment(args)

[32m2022-05-08 11:00:14.322[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m89[0m - [1mbeam project: mnist[0m
[32m2022-05-08 11:00:14.323[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m90[0m - [1mSimulation Hyperparameters[0m
[32m2022-05-08 11:00:14.324[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1mproject_name: mnist[0m
[32m2022-05-08 11:00:14.324[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1midentifier: debug[0m
[32m2022-05-08 11:00:14.325[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1malgorithm: MNISTAlgorithm[0m
[32m2022-05-08 11:00:14.325[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1mmp_port: None[0m
[32m2022-05-08 11:00:14.325[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m94[0m - [1mroot_dir: /home/sh

In [13]:
alg = experiment(mnist_algorithm_generator, experiment)

[32m2022-05-08 11:00:14.389[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m408[0m - [1mInitializing 2 parallel workers[0m
[32m2022-05-08 11:00:14.390[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m413[0m - [1mMultiprocessing port is: 46417[0m
[32m2022-05-08 11:00:15.939[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m40[0m - [1mWorker: 1/2 is running...[0m
[32m2022-05-08 11:00:15.948[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m40[0m - [1mWorker: 2/2 is running...[0m


train: 100%|██████████| 389/389 [00:05<00:00, 73.84it/s] 
validation:   0%|          | 0/389 [00:00<?, ?it/s]
validation: 100%|██████████| 389/389 [00:00<00:00, 682.83it/s]


[32m2022-05-08 11:00:23.102[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 1/2:[0m
[32m2022-05-08 11:00:23.109[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:23.110[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.301992 	|[0m
[32m2022-05-08 11:00:23.110[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.912958 	|[0m
[32m2022-05-08 11:00:23.110[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.00099 	|[0m
[32m2022-05-08 11:00:23.110[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:23.111[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.125256 	|[0m
[32m2022-05-08 11


train:   0%|          | 0/389 [00:00<?, ?it/s]
train: 100%|██████████| 389/389 [00:02<00:00, 138.74it/s]

validation: 100%|██████████| 389/389 [00:00<00:00, 829.48it/s]


[32m2022-05-08 11:00:26.717[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 2/2:[0m
[32m2022-05-08 11:00:26.721[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:26.721[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0610209 	|[0m
[32m2022-05-08 11:00:26.722[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.982196 	|[0m
[32m2022-05-08 11:00:26.722[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.0009801 	|[0m
[32m2022-05-08 11:00:26.722[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:26.722[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0923414 	|[0m
[32m2022-05-0

Inference works the same if we use the default runner, the runner reloads the stored algorithm into a single GPU mode

In [14]:
inference = alg('test')

print('Test inference results:')
for n, v in inference['metrics'].items():
    print(f'{n}:')
    print(v)

test:   0%|          | 0/39 [00:00<?, ?it/s]

Test inference results:
precision:
[0.97289157 0.98769772 0.95331466 0.98205384 0.97846154 0.96885428
 0.98312236 0.97170732 0.97894737 0.97487437]
recall:
[0.98877551 0.99030837 0.98934109 0.97524752 0.97148676 0.9764574
 0.97286013 0.9688716  0.95482546 0.96134787]
fscore:
[0.98076923 0.98900132 0.97099382 0.97863885 0.97496168 0.97264098
 0.97796432 0.97028738 0.96673597 0.96806387]
support:
[ 980 1135 1032 1010  982  892  958 1028  974 1009]


we can also determine our own experiment routine and return the results

In [15]:
# here we initialize the workers (can be single or multiple workers, depending on the configuration)
results = experiment.run(run_mnist)

[32m2022-05-08 11:00:27.239[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m408[0m - [1mInitializing 2 parallel workers[0m
[32m2022-05-08 11:00:27.241[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m413[0m - [1mMultiprocessing port is: 41631[0m
[32m2022-05-08 11:00:28.733[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m40[0m - [1mWorker: 1/2 is running...[0m
[32m2022-05-08 11:00:28.762[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m40[0m - [1mWorker: 2/2 is running...[0m


train: 100%|██████████| 389/389 [00:02<00:00, 162.10it/s]
train: 100%|██████████| 389/389 [00:04<00:00, 79.47it/s] 
validation: 100%|██████████| 389/389 [00:00<00:00, 847.84it/s]


[32m2022-05-08 11:00:35.669[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 1/2:[0m
[32m2022-05-08 11:00:35.679[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:35.679[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.303819 	|[0m
[32m2022-05-08 11:00:35.679[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.911994 	|[0m
[32m2022-05-08 11:00:35.680[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.00099 	|[0m
[32m2022-05-08 11:00:35.680[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:35.680[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.128011 	|[0m
[32m2022-05-08 11


train:   0%|          | 0/389 [00:00<?, ?it/s]
train: 100%|██████████| 389/389 [00:02<00:00, 150.18it/s]
validation:   0%|          | 0/389 [00:00<?, ?it/s]
validation: 100%|██████████| 389/389 [00:00<00:00, 838.92it/s]


[32m2022-05-08 11:00:39.093[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m291[0m - [1mFinished epoch 2/2:[0m
[32m2022-05-08 11:00:39.097[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mtrain:[0m
[32m2022-05-08 11:00:39.098[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0614947 	|[0m
[32m2022-05-08 11:00:39.098[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1macc 0.981724 	|[0m
[32m2022-05-08 11:00:39.098[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mlr 0.0009801 	|[0m
[32m2022-05-08 11:00:39.098[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m330[0m - [1mvalidation:[0m
[32m2022-05-08 11:00:39.099[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m334[0m - [1mloss 0.0945423 	|[0m
[32m2022-05-0

In [16]:
results[1]['results']['validation']['scalar']['loss']

[0.06506209820508957,
 0.06944388151168823,
 0.08207054436206818,
 0.10892399400472641,
 0.0869956761598587,
 0.10769040882587433,
 0.10257066786289215,
 0.08572819083929062,
 0.1246243342757225,
 0.026350203901529312,
 0.10090633481740952,
 0.11319126188755035,
 0.06160338595509529,
 0.062499530613422394,
 0.08562101423740387,
 0.04624390974640846,
 0.14300015568733215,
 0.13295878469944,
 0.05520991235971451,
 0.057520970702171326,
 0.03143289312720299,
 0.09052925556898117,
 0.10364935547113419,
 0.09490916877985,
 0.07175968587398529,
 0.07635423541069031,
 0.08468688279390335,
 0.06221984326839447,
 0.14224795997142792,
 0.1283537596464157,
 0.1678728461265564,
 0.08852816373109818,
 0.022741621360182762,
 0.056438833475112915,
 0.12966875731945038,
 0.17939400672912598,
 0.10628193616867065,
 0.1538679301738739,
 0.13159161806106567,
 0.1012459322810173,
 0.10252518206834793,
 0.03237597271800041,
 0.08204969763755798,
 0.10337439179420471,
 0.05119701474905014,
 0.12647500634193