In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torchvision
import torch.nn.functional as F
from torch import nn
from sklearn.metrics import precision_recall_fscore_support
import numpy as np

# manage ray's relative imports
import ray
runtime_env = {"working_dir": ".." }
ray.init(runtime_env=runtime_env, dashboard_port=13065, include_dashboard=True)

from ray import tune
from ray.tune.suggest.optuna import OptunaSearch
from ray.tune import JupyterNotebookReporter

# manage beams's relative imports
import sys
sys.path.append('..')

from src.beam import beam_arguments, Experiment, Study
from src.beam import UniversalDataset, UniversalBatchSampler, PackedFolds
from src.beam import Algorithm
from src.beam import LinearNet, check_type, slice_to_index
from torchvision import transforms
import matplotlib.pyplot as plt

from src.beam import DataTensor
from src.beam.utils import is_notebook

from sklearn.datasets import fetch_covtype
import pandas as pd

from covtype_example import CovtypeDataset, CovtypeAlgorithm, RuleNet, covtype_algorithm_generator, get_covtype_parser 

2022-06-23 16:26:06,975	INFO services.py:1456 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:13065[39m[22m
2022-06-23 16:26:08,490	INFO packaging.py:388 -- Creating a file package for local directory '..'.
2022-06-23 16:26:08,509	INFO packaging.py:241 -- Pushing file package 'gcs://_ray_pkg_2adadfa5978c26c6.zip' (0.84MiB) to Ray cluster...
2022-06-23 16:26:08,519	INFO packaging.py:243 -- Successfully pushed file package 'gcs://_ray_pkg_2adadfa5978c26c6.zip'.


In [2]:
path_to_data = '/home/shared/data/dataset/covtype'
root_dir = '/home/shared/data/results/covtype'

In [3]:
args = beam_arguments(get_covtype_parser(), 
        f"--project-name=covtype --root-dir={root_dir} --algorithm=CovtypeAlgorithm --device=1 --no-half --lr-d=1e-3 --batch-size=256",
        "--n-epochs=2 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
        "--weight-decay=1e-5 --beta1=0.9 --beta2=0.999", label_smoothing=.05, weight_factor=.0,
        path_to_data=path_to_data, dropout=.0, activation='gelu', channels=128, n_rules=64, n_layers=2,)

experiment = Experiment(args)

[32m2022-06-23 15:21:57.648[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m183[0m - [1mbeam project: covtype[0m
[32m2022-06-23 15:21:57.651[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m184[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-23 15:21:57.653[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - [1mproject_name: covtype[0m
[32m2022-06-23 15:21:57.654[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - [1midentifier: debug[0m
[32m2022-06-23 15:21:57.654[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - [1malgorithm: CovtypeAlgorithm[0m
[32m2022-06-23 15:21:57.655[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - [1mmp_port: None[0m
[32m2022-06-23 15:21:57.657[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - [1mroot_

In [4]:
# alg = covtype_algorithm_generator(experiment)

In [7]:
alg = experiment(covtype_algorithm_generator)

[32m2022-06-23 15:35:07.080[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m553[0m - [1mSingle worker mode[0m
[32m2022-06-23 15:35:07.082[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m45[0m - [1mWorker: 1/1 is running...[0m


train:   0%|          | 0/1360 [00:00<?, ?it/s]

validation:   0%|          | 0/452 [00:00<?, ?it/s]

[32m2022-06-23 15:36:06.880[0m | [1mINFO    [0m | [36msrc.beam.model[0m:[36mstep[0m:[36m584[0m - [1mbr was changed to 1[0m
[32m2022-06-23 15:36:06.881[0m | [1mINFO    [0m | [36msrc.beam.model[0m:[36mstep[0m:[36m585[0m - [1mlambda_llr was changed to 0[0m

[32m2022-06-23 15:36:06.884[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m397[0m - [1mFinished epoch 1/2:[0m
[32m2022-06-23 15:36:06.889[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mtrain:[0m
[32m2022-06-23 15:36:06.891[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss 207.968 	|[0m
[32m2022-06-23 15:36:06.892[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1macc 0.734467 	|[0m
[32m2022-06-23 15:36:06.893[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mlr 0.001 	|[0m
[32m2022-06-23 15:3

train:   0%|          | 0/1360 [00:00<?, ?it/s]

validation:   0%|          | 0/452 [00:00<?, ?it/s]

[32m2022-06-23 15:36:58.890[0m | [1mINFO    [0m | [36msrc.beam.model[0m:[36mstep[0m:[36m584[0m - [1mbr was changed to 1[0m
[32m2022-06-23 15:36:58.891[0m | [1mINFO    [0m | [36msrc.beam.model[0m:[36mstep[0m:[36m585[0m - [1mlambda_llr was changed to 0[0m

[32m2022-06-23 15:36:58.895[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m397[0m - [1mFinished epoch 2/2:[0m
[32m2022-06-23 15:36:58.901[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mtrain:[0m
[32m2022-06-23 15:36:58.902[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss 165.038 	|[0m
[32m2022-06-23 15:36:58.902[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1macc 0.819548 	|[0m
[32m2022-06-23 15:36:58.903[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mlr 0.001 	|[0m
[32m2022-06-23 15:3

In [13]:
results = alg.evaluate('test')

test:   0%|          | 0/454 [00:00<?, ?it/s]

In [15]:
np.mean(results['scalar']['acc'])

0.8330432722747063

In [14]:
%load_ext tensorboard

In [15]:
%tensorboard --logdir /home/shared/data/results/covtype/covtype/CovtypeAlgorithm/debug/0001_20220622_160022 --port=17067 --bind_all

In [6]:
args = beam_arguments(
        f"--project-name=covtype --root-dir={root_dir} --algorithm=CovtypeAlgorithm --device=1 --no-amp --lr-d=1e-3 --batch-size=256",
        "--n-epochs=100 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark --identifier=half_precision",
        "--weight-decay=1e-5 --beta1=0.9 --beta2=0.999", label_smoothing=.05, weight_factor=.5,
        path_to_data=path_to_data, gamma=1., dropout=.0, activation='gelu', channels=512)

experiment = Experiment(args)

[32m2022-06-23 11:44:34.677[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m183[0m - [1mbeam project: covtype[0m
[32m2022-06-23 11:44:34.679[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m184[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-23 11:44:34.683[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - [1mproject_name: covtype[0m
[32m2022-06-23 11:44:34.685[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - [1midentifier: half_precision[0m
[32m2022-06-23 11:44:34.686[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - [1malgorithm: CovtypeAlgorithm[0m
[32m2022-06-23 11:44:34.687[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - [1mmp_port: None[0m
[32m2022-06-23 11:44:34.688[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m189[0m - 

In [7]:
alg = experiment(covtype_algorithm_generator)

[32m2022-06-23 11:44:37.677[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun[0m:[36m553[0m - [1mSingle worker mode[0m
[32m2022-06-23 11:44:37.680[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mrun_worker[0m:[36m45[0m - [1mWorker: 1/1 is running...[0m


train:   0%|          | 0/1360 [00:00<?, ?it/s]

validation:   0%|          | 0/452 [00:00<?, ?it/s]


[32m2022-06-23 11:50:32.263[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m397[0m - [1mFinished epoch 1/100:[0m
[32m2022-06-23 11:50:32.272[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mtrain:[0m
[32m2022-06-23 11:50:32.274[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss 228.893 	|[0m
[32m2022-06-23 11:50:32.274[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1macc 0.695109 	|[0m
[32m2022-06-23 11:50:32.275[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mlr 0.001 	|[0m
[32m2022-06-23 11:50:32.277[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mvalidation:[0m
[32m2022-06-23 11:50:32.283[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss 195.103 	|[0m
[32m2022-06-23 11:5

train:   0%|          | 0/1360 [00:00<?, ?it/s]

validation:   0%|          | 0/452 [00:00<?, ?it/s]


[32m2022-06-23 11:56:29.838[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m397[0m - [1mFinished epoch 2/100:[0m
[32m2022-06-23 11:56:29.844[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mtrain:[0m
[32m2022-06-23 11:56:29.845[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss 167.189 	|[0m
[32m2022-06-23 11:56:29.846[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1macc 0.816791 	|[0m
[32m2022-06-23 11:56:29.846[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mlr 0.001 	|[0m
[32m2022-06-23 11:56:29.847[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mvalidation:[0m
[32m2022-06-23 11:56:29.847[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss nan 	|[0m
[32m2022-06-23 11:56:29

train:   0%|          | 0/1360 [00:00<?, ?it/s]

validation:   0%|          | 0/452 [00:00<?, ?it/s]


[32m2022-06-23 12:03:08.504[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m397[0m - [1mFinished epoch 3/100:[0m
[32m2022-06-23 12:03:08.515[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mtrain:[0m
[32m2022-06-23 12:03:08.517[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss 139.599 	|[0m
[32m2022-06-23 12:03:08.525[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1macc 0.870749 	|[0m
[32m2022-06-23 12:03:08.527[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mlr 0.001 	|[0m
[32m2022-06-23 12:03:08.529[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mvalidation:[0m
[32m2022-06-23 12:03:08.531[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss nan 	|[0m
[32m2022-06-23 12:03:08

train:   0%|          | 0/1360 [00:00<?, ?it/s]

validation:   0%|          | 0/452 [00:00<?, ?it/s]


[32m2022-06-23 12:08:59.802[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m397[0m - [1mFinished epoch 4/100:[0m
[32m2022-06-23 12:08:59.808[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mtrain:[0m
[32m2022-06-23 12:08:59.810[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss nan 	|[0m
[32m2022-06-23 12:08:59.811[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1macc 0.895735 	|[0m
[32m2022-06-23 12:08:59.813[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mlr 0.001 	|[0m
[32m2022-06-23 12:08:59.815[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mvalidation:[0m
[32m2022-06-23 12:08:59.817[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss nan 	|[0m
[32m2022-06-23 12:08:59.820

train:   0%|          | 0/1360 [00:00<?, ?it/s]

validation:   0%|          | 0/452 [00:00<?, ?it/s]


[32m2022-06-23 12:14:43.038[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36msave_model_results[0m:[36m397[0m - [1mFinished epoch 5/100:[0m
[32m2022-06-23 12:14:43.057[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mtrain:[0m
[32m2022-06-23 12:14:43.058[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss nan 	|[0m
[32m2022-06-23 12:14:43.060[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1macc 0.903142 	|[0m
[32m2022-06-23 12:14:43.060[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mlr 0.001 	|[0m
[32m2022-06-23 12:14:43.062[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m443[0m - [1mvalidation:[0m
[32m2022-06-23 12:14:43.063[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36mlog_data[0m:[36m447[0m - [1mloss nan 	|[0m
[32m2022-06-23 12:14:43.064

train:   0%|          | 0/1360 [00:00<?, ?it/s]

KeyboardInterrupt: 

## Hyperparameter optimization with ray and optuna

In [3]:
args = beam_arguments(get_covtype_parser(), 
        f"--project-name=covtype --root-dir={root_dir} --algorithm=CovtypeAlgorithm --device=0 --no-half --lr-d=1e-3 --batch-size=256",
        "--n-epochs=40 --clip=0 --parallel=1 --accumulate=1 --cudnn-benchmark",
        "--weight-decay=1e-5 --beta1=0.9 --beta2=0.999", label_smoothing=.05, weight_factor=.0,
        path_to_data=path_to_data, dropout=.0, activation='gelu', channels=128, n_rules=64, n_layers=2,)

study = Study(covtype_algorithm_generator, args)

[32m2022-06-23 16:26:10.599[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m82[0m - [1mHyperparameter Optimization[0m
[32m2022-06-23 16:26:10.600[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m83[0m - [1mbeam project: covtype[0m
[32m2022-06-23 16:26:10.601[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m84[0m - [1mExperiment Hyperparameters[0m
[32m2022-06-23 16:26:10.601[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m87[0m - [1mproject_name: covtype[0m
[32m2022-06-23 16:26:10.602[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m87[0m - [1midentifier: debug_hp_optimization_20220623_162610[0m
[32m2022-06-23 16:26:10.602[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[36m__init__[0m:[36m87[0m - [1malgorithm: CovtypeAlgorithm[0m
[32m2022-06-23 16:26:10.603[0m | [1mINFO    [0m | [36msrc.beam.experiment[0m:[

In [None]:
analysis = study.tune(config={"lr_dense": tune.loguniform(1e-4, 1e-2),
                              "lr_sparse": tune.loguniform(1e-3, 1e-1),
                              "dropout": tune.uniform(.0, .25),
                              "scheduler_factor": tune.uniform(.1, .5),
                              "scheduler_patience": tune.choice([12, 16, 20]),
                              "weight_decay": tune.loguniform(1e-6, 1e-3),
                              "beta1": tune.choice([.9, .95]),
                              "beta2": tune.choice([.99, .999]),
                              "n_tables": tune.choice([1, 5, 15]),
                              "n_quantiles": tune.choice([4, 10, 20]),
                              "weight_factor": tune.choice([.0, .25, .5]),
                              "label_smoothing": tune.choice([.0, .1, .2]),
                              "channels": tune.choice([64, 128, 256]),
                              "n_rules": tune.choice([64, 128, 256]),
                              "n_layers": tune.choice([1, 2]),
                              "activation": tune.choice(['celu', 'gelu']),
                              "k_p": tune.loguniform(.01, .1),
                              "k_i": tune.loguniform(.001, .01),
                              "k_d": tune.loguniform(.001, .01),
                              "initial_mask": tune.uniform(.9, 1.),},
                       metric="mean_accuracy",
                       max_concurrent_trials=2,
                       resources_per_trial={"gpu": 1},
                       mode="max",
                       search_alg=OptunaSearch(),
                      progress_reporter=JupyterNotebookReporter(overwrite=True),
                       num_samples=1000)

Trial name,status,loc,activation,beta1,beta2,channels,dropout,initial_mask,k_d,k_i,k_p,label_smoothing,lr_dense,lr_sparse,n_layers,n_quantiles,n_rules,n_tables,scheduler_factor,scheduler_patience,weight_decay,weight_factor,acc,iter,total time (s)
runner_tune_3135cad6,RUNNING,172.17.0.7:69962,celu,0.95,0.999,256,0.0775723,0.954624,0.00257663,0.00392462,0.0312595,0.0,0.00216574,0.0380389,2,4,64,5,0.495696,16,2.05277e-05,0.25,0.906846,24,1913.7
runner_tune_34a16dd8,RUNNING,172.17.0.7:70000,gelu,0.9,0.999,64,0.063125,0.986053,0.00579412,0.00207835,0.0142935,0.2,0.00122931,0.0509657,2,20,256,1,0.155606,16,1.67556e-06,0.5,0.912775,7,1811.54
