## Initialization  

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))
%load_ext autoreload
%autoreload 2

In [2]:
import os 
import sys
sys.path.insert(0, '../src')
# sys.path.insert(0, '/home/kbardool/kusanagi/AdaSparseChem/src') ; print(sys.path)
import time
import argparse
import yaml
import types, copy, pprint
from time import sleep
from datetime import datetime
import pandas as pd
import numpy  as np
from utils import (initialize, init_dataloaders, init_environment, init_wandb, training_initializations, model_initializations, 
                   check_for_resume_training, disp_dataloader_info, disp_info_1, warmup_phase, weight_policy_training, 
                   display_gpu_info, init_dataloaders_by_fold_id, print_separator, print_heading, print_underline,
                   timestring, print_loss, print_metrics_cr, get_command_line_args, load_from_pickle) 

pp = pprint.PrettyPrinter(indent=4)
np.set_printoptions(edgeitems=3, infstr='inf', linewidth=150, nanstr='nan')
pd.options.display.width = 132
os.environ["WANDB_NOTEBOOK_NAME"] = "Adashare_Train.ipynb"

## Set visible GPU device 
##----------------------------------------------
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
# os.environ["CUDA_VISIBLE_DEVICES"] = '2'
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Initialization and  Environment Setup

### Parse Input Args  - Read YAML config file - wandb initialization

In [3]:
# synthetic_config_file  = "../yamls/chembl_synt_train.yaml"
# config_file      = "../yamls/chembl_mini_train.yaml"
config_file      = "../yamls/chembl_cb29_train_1task.yaml"
config_file      = "../yamls/chembl_cb29_train_10task.yaml"
batch_size=4098
# batch_size=2048
# RESUME_MODEL_CKPT = 'model_train_ep_25_seed_0088'

####   For Resume 

In [4]:
restart_input_args = f" --config  {config_file} " \
             f" --batch_size       {batch_size} "  \
             " --exp_desc            10-task warmup with policy training " \
             " --hidden_size             4000 4000 4000 4000 4000 4000 "  \
             " --warmup_epochs             50 "  \
             " --tail_hidden_size        4000 "  \
             " --first_dropout           0.80 "  \
             " --middle_dropout          0.80 "  \
             " --last_dropout            0.80 "  \
             " --seed_idx                   0 "  \
             " --task_lr                0.001 "  \
             " --backbone_lr            0.001 "  \
             " --decay_lr_rate            0.5 "  \
             " --decay_lr_freq             40 "  \
             " --decay_lr_cooldown         10 "  \
             " --policy_lr               0.01 "  \
             " --policy_decay_lr_rate     0.5 "  \
             " --policy_decay_lr_freq      40 "  \
             " --policy_decay_lr_cooldown  10 "  \
             " --lambda_tasks             1.0 "  \
             " --lambda_sparsity        0.001 "  \
             " --lambda_sharing          0.05 "  \
             " --pytorch_threads            7 "  \
             " --cuda_devices               2"   \
             " --gpu_ids                    0 "  \
             " --resume"                       \
             " --resume_path        ../../experiments/AdaSparseChem-cb29-10task/4000x6_0822_1755_lr0.001_do0.8" \
             " --resume_ckpt        model_warmup_last_ep_10" \
             " --resume_metrics     metrics_warmup_last_ep_10.pickle" \
             " --exp_id             1x50t0va" \
             " --exp_name           0822_1755 " \
             " --folder_sfx         RESUME_2 "

#              " --resume_ckpt        model_best_model" \
#              " --resume_metrics     metrics_best.pickle" \


####  For Initiating 

In [5]:
input_args = f" --config          {config_file} " \
             f" --batch_size       {batch_size} "  \
             " --exp_desc            10-task warmup with policy training " \
             " --exp_desc            10-task with policy training " \
             " --hidden_size             4000 4000 4000 4000 4000 4000 "  \
             " --tail_hidden_size        4000 "  \
             " --warmup_epochs             20 "  \
             " --first_dropout           0.70 "  \
             " --middle_dropout          0.70 "  \
             " --last_dropout            0.70 "  \
             " --seed_idx                   0 "  \
             " --task_lr                0.001 "  \
             " --backbone_lr            0.001 "  \
             " --decay_lr_rate            0.5 "  \
             " --decay_lr_freq             30 "  \
             " --decay_lr_cooldown         10 "  \
             " --policy_lr               0.01 "  \
             " --policy_decay_lr_rate     0.5 "  \
             " --policy_decay_lr_freq      30 "  \
             " --policy_decay_lr_cooldown  10 "  \
             " --lambda_tasks             1.0 "  \
             " --lambda_sparsity        0.001 "  \
             " --lambda_sharing          0.05 "  \
             " --pytorch_threads            7 "  \
             " --cuda_devices               2"   \
             " --gpu_ids                    0 "  \

#              " --decay_lr_rate       0.3 "  \
#              " --decay_lr_freq        10 "  \
#              " --policy_lr         0.001 "  \
#              " --lambda_sparsity    0.02 "  \
#              " --lambda_sharing     0.01 "  \

### Read yaml Configuration File

In [6]:
ns = types.SimpleNamespace()
input_args = input_args.split() if input_args is not None else input_args
# input_args = restart_input_args.split() 
ns.args = get_command_line_args(input_args, display = True)
os.environ["CUDA_VISIBLE_DEVICES"]=ns.args.cuda_devices


 command line parms : 
------------------------
 config...................  ../yamls/chembl_cb29_train_10task.yaml
 project_name.............  None
 exp_id...................  31vs18ta
 exp_name.................  None
 folder_sfx...............  None
 exp_desc.................  10-task with policy training
 hidden_sizes.............  [4000, 4000, 4000, 4000, 4000, 4000]
 tail_hidden_size.........  [4000]
 warmup_epochs............  20
 training_epochs..........  None
 seed_idx.................  0
 batch_size...............  4098
 first_dropout............  0.7
 middle_dropout...........  0.7
 last_dropout.............  0.7
 backbone_lr..............  0.001
 task_lr..................  0.001
 policy_lr................  0.01
 decay_lr_rate............  0.5
 decay_lr_freq............  30
 decay_lr_cooldown........  10
 policy_decay_lr_rate.....  0.5
 policy_decay_lr_freq.....  30
 policy_decay_lr_cooldown.  10
 lambda_tasks.............  1.0
 lambda_sparsity..........  0.001
 lambda_shari

In [7]:
# display_gpu_info()

In [8]:
opt = initialize(ns, build_folders = True)

##################################################
################### READ YAML ####################
##################################################
 Pytorch thread count: 20
 Set Pytorch thread count to : 7
 Pytorch thread count set to : 7


[34m[1mwandb[0m: Currently logged in as: [33mkbardool[0m. Use [1m`wandb login --relogin`[0m to force relogin


 WandB Initialization -----------------------------------------------------------
 PROJECT NAME: AdaSparseChem-cb29-10Task
 RUN ID      : 31vs18ta 
 RUN NAME    : 0826_2144
 --------------------------------------------------------------------------------


 log_dir              create folder:  ../../experiments/AdaSparseChem-cb29-10task/4000x6_0826_2144_lr0.001_do0.7
 result_dir           folder exists:  ../../experiments/AdaSparseChem-cb29-10task/4000x6_0826_2144_lr0.001_do0.7
 checkpoint_dir       folder exists:  ../../experiments/AdaSparseChem-cb29-10task/4000x6_0826_2144_lr0.001_do0.7

------------------------------------------------------------------------------------------------------------------------
 experiment name       : 0826_2144 
 experiment id         : 31vs18ta 
 folder_name           : 4000x6_0826_2144_lr0.001_do0.7 
 experiment description: 10-task with policy training
 Random seeds          : [88, 45, 50, 100, 44, 48, 2048, 2222, 9999]
 Random  seed used     : 88 
 l

In [9]:
# ns.wandb_run.finish()
# ns.wandb_run.finish()

### Setup Dataloaders

In [49]:
# dldrs = init_dataloaders(opt, verbose = False)
dldrs = init_dataloaders_by_fold_id(opt, verbose = False)
disp_dataloader_info(dldrs)

 Warmup folds    : [2, 3, 4]
 Weights folds   : [2, 3]
 Policy folds    : [4]
 Validation folds: [1]
---------------------------------------------------------------------------
Load label/Y file for task 1 - task group chembl_29_Y_tg_0_cols_472.npy
--------------------------------------------------------------------------- 

 Number of non-zero features in ecfp[0]:79

 Task 1 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :      81937 
    Total   -1  Labels :     188511 
    Total < -1  Labels :          0 
    Total != 0  Labels :     270448

 Task 1 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels per Sample  : 472  Y rows with populated labels: 32866  non zero cols: 81937

 Task 1 files post-filtering : 
---------------------------------
X file : # Samples :  254529     # Features per Sample: 32000 
Y file : # Samples :  254529     # Labels per


 Task 9 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels per Sample  : 344  Y rows with populated labels: 35996  non zero cols: 110249

 Task 9 files post-filtering : 
---------------------------------
X file : # Samples :  254529     # Features per Sample: 32000 
Y file : # Samples :  254529     # Labels per Sample  : 344  Y rows with populated labels: 20684  non zero cols: 63517

Using 226 of 344 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
------------------------------------------------------------------------------
Load label/Y file for task 10 - task group chembl_29_Y_tg_1031_cols_72.npy
------------------------------------------------------------------------------ 

 Number of non-zero features in ecfp[0]:79

 Task 10 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :      18631 
    Total   -1  Labels :     


 Task 8 files post-filtering : 
---------------------------------
X file : # Samples :  168649     # Features per Sample: 32000 
Y file : # Samples :  168649     # Labels per Sample  : 148  Y rows with populated labels: 8900  non zero cols: 19704

Using 80 of 148 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
------------------------------------------------------------------------------
Load label/Y file for task 9 - task group chembl_29_Y_tg_1028_cols_344.npy
------------------------------------------------------------------------------ 

 Number of non-zero features in ecfp[0]:79

 Task 9 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :     110249 
    Total   -1  Labels :     213195 
    Total < -1  Labels :          0 
    Total != 0  Labels :     323444

 Task 9 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels 


 Task 7 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :      38227 
    Total   -1  Labels :      91904 
    Total < -1  Labels :          0 
    Total != 0  Labels :     130131

 Task 7 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels per Sample  : 224  Y rows with populated labels: 11789  non zero cols: 38227

 Task 7 files post-filtering : 
---------------------------------
X file : # Samples :  85880     # Features per Sample: 32000 
Y file : # Samples :  85880     # Labels per Sample  : 224  Y rows with populated labels: 2614  non zero cols: 7798

Using 109 of 224 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
------------------------------------------------------------------------------
Load label/Y file for task 8 - task group chembl_29_Y_tg_1005_cols_148.npy
-------------------------------------------------


 Task 5 files post-filtering : 
---------------------------------
X file : # Samples :  86274     # Features per Sample: 32000 
Y file : # Samples :  86274     # Labels per Sample  : 620  Y rows with populated labels: 10004  non zero cols: 27223

Using 389 of 620 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
-----------------------------------------------------------------------------
Load label/Y file for task 6 - task group chembl_29_Y_tg_643_cols_184.npy
----------------------------------------------------------------------------- 

 Number of non-zero features in ecfp[0]:79

 Task 6 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :      41813 
    Total   -1  Labels :      69820 
    Total < -1  Labels :          0 
    Total != 0  Labels :     111633

 Task 6 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels per

### Setup Model  

In [11]:

environ = init_environment(ns, opt, is_train = True, display_cfg = True)


##################################################
############# CREATE THE ENVIRONMENT #############
##################################################
 device is  cuda:0
--------------------------------------------------
 SparseChem_Backbone  Ver: 1.0 Init() Start 
-------------------------------------------------- 

 layer config        : [1, 1, 1, 1, 1, 1] 
 skip residual layers: False   skip hidden layers  : False
 SparseChem_BackBone() Input Layer  - Input: 32000  output: 4000  non-linearity:<class 'torch.nn.modules.activation.ReLU'>
 Hidden layer 0 - Input: 4000   output:4000
    _make_layer() using block: <class 'models.sparsechem_backbone.SparseChemBlock'>
           input_size: 4000 output_sz: 4000  non_linearity: ReLU() dropout: 0.7 bias: True
           SparseChemBlock.init(): input_size: 4000 output_sz: 4000   non_linearity: ReLU() dropout: 0.7 bias: True
 Hidden layer 1 - Input: 4000   output:4000
    _make_layer() using block: <class 'models.sparsechem_backbone.SparseChe

### Initiate / Resume Training 

In [12]:
check_for_resume_training(ns, opt, environ, epoch = 0 , iter = 0)

opt['train']['which_iter'] :  warmup
##################################################
######## Initiate Training from scratch  #########
##################################################


# Warmup Training

### Warmup Training Preparation

In [13]:
model_initializations(ns, opt, environ, phase = 'update_weights', policy_learning = False)
training_initializations(ns, opt, environ, dldrs, warmup_iterations = 1000, weight_iterations = 750, policy_iterations = 250, eval_iterations = 250, warmup = True)

# training_initializations(ns, opt, environ, dldrs, warmup_iterations = 2, eval_iterations = 2, warmup = True)
# training_initializations(ns, opt, environ, dldrs, warmup = True)

 Model optimizers defined . . . policy_learning: False
 Model schedulers defined . . . policy_learning: False
 Metrics CSV file header written . . . 
 Model initializations complete . . . 
 training preparation: - check for CUDA - cuda available as device id: [0]
sparsechem_env.cuda()
 training preparation: - set print_freq to                                 : 1989 
 training preparation: - set number of batches per warmup training epoch to: 1000
 training preparation: - set number of batches per weight training epoch to: 750
 training preparation: - set number of batches per policy training epoch to: 250
 training preparation: - set number of batches per validation to           : 250
 training preparation complete . . .


In [14]:
# print('-'*80)
disp_info_1(ns, opt, environ)
print('-'*80)
print(environ.disp_for_excel())


 Num_blocks                : 6                                

 batch size                : 128 
 # batches / Warmup epoch  : 750 
 # batches / Weight epoch  : 750 
 # batches / Policy epoch  : 250                                 

 Print Frequency           : -1 
 Config Val Frequency      : 500 
 Config Val Iterations     : 675 
 Val iterations            : 250 
 which_iter                : warmup 
 train_resume              : False                                 
 
 fix BN parms              : False 
 Task LR                   : 0.001 
 Backbone LR               : 0.001                                 

 Sharing  regularization   : 0.05 
 Sparsity regularization   : 0.001 
 Task     regularization   : 1.0                                 

 Current epoch             : 0  
 Warm-up epochs            : 20 
 Training epochs           : 250
--------------------------------------------------------------------------------

    folder: 4000x6_0826_2144_lr0.001_do0.7
    layers: 6 [4000, 

In [15]:
# environ.display_trained_logits(ns.current_epoch,out=sys.stdout) 
# environ.display_trained_policy(ns.current_epoch,out=sys.stdout)

In [16]:
# ns.eval_iters = 250
# ns.trn_iters_warmup = 750
# ns.eval_iters = 2
# ns.trn_iters_warmup = 2
print(ns.eval_iters )
print(ns.trn_iters_warmup)
print(ns.trn_iters_weights)
print(ns.trn_iters_policy)

# ns.check_for_improvment_wait = 0
# ns.current_epoch =0 
# ns.write_checkpoint = False
print_heading(f" Last Epoch: {ns.current_epoch}   # of warm-up epochs to do:  {ns.warmup_epochs} - Run epochs {ns.current_epoch+1} to {ns.current_epoch + ns.warmup_epochs}", verbose = True)

250
1000
750
250
------------------------------------------------------------------------
 Last Epoch: 0   # of warm-up epochs to do:  20 - Run epochs 1 to 20
------------------------------------------------------------------------ 



### Warmup Training 

In [19]:

warmup_phase(ns,opt, environ, dldrs, epochs = 20, verbose = False, disable_tqdm = False)


--------------------------------------------------------------------------
 Last Epoch: 40   # of warm-up epochs to do:  20 - Run epochs 41 to 60
-------------------------------------------------------------------------- 

 Ep  | Trunk LR  Heads LR  Polcy LR  Gmbl Tmp |  trn tsk    trn spar    trn shar   trn ttl |    logloss   bceloss  avg prec    aucroc     aucpr    f1_max |  val tsk    val spar    val shar     total |  time |   
  41 | 1.00e-03  1.00e-03  1.00e-02  2.50e+00 |   1.3877   5.415e-05   1.081e-04    1.3879 |  1.144e-05   0.48170   0.68711   0.75195   0.66377   0.72837 |   2.1953   6.780e-05   1.353e-04    2.1955 | 297.0 |
  42 | 1.00e-03  1.00e-03  1.00e-02  2.50e+00 |   1.7095   5.415e-05   1.081e-04    1.7097 |  1.134e-05   0.48116   0.69207   0.75059   0.67056   0.72807 |   2.1722   5.415e-05   1.081e-04    2.1724 | 292.7 |   
  43 | 1.00e-03  1.00e-03  1.00e-02  2.50e+00 |   2.1004   5.415e-05   1.081e-04    2.1006 |  1.132e-05   0.47564   0.68530   0.75445   0.66369 

In [20]:
pp.pprint(environ.val_metrics['total'])
print(environ.val_metrics['total']['task'] + environ.val_metrics['total']['policy'])

{   'policy': 0.00020307948580011725,
    'task': 2.140690733723222,
    'total': 2.140893813209022,
    'total_mean': nan}
2.140893813209022


In [21]:
print_metrics_cr(ns.current_epoch,  time.time() - time.time() , ns.trn_losses, ns.val_metrics, 1, out=[sys.stdout, environ.log_file]) 
print_loss(ns.val_metrics, title = f"[e] Last ep:{ns.current_epoch}  it:{ns.current_iter} ")

# print()
# environ.display_trained_logits(ns.current_epoch)
# environ.display_trained_policy(ns.current_epoch)
# environ.display_current_policy(ns.current_epoch)

  60 | 1.00e-03  1.00e-03  1.00e-02  2.50e+00 |   1.1585   5.415e-05   1.081e-04    1.1586 |  1.123e-05   0.47535   0.69222   0.75139   0.66947   0.72744 |   2.1407   6.780e-05   1.353e-04    2.1409 |  -0.0 |
[e] Last ep:60  it:60000  -  Total Loss: 2.1409     
Task: 2.1407   Sparsity: 6.77984e-05    Sharing: 1.35281e-04 


### End WandB 

In [30]:
# ns.wandb_run.finish()
# ns.wandb_run.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
auc_pr,▂▂▃▁▆▆▅▅▅█▂▇▃▇▅▄▆▅▄▃▃▄▅▃▃▅▅▄▅▅▇▇▃▄▅▂▃▆▂▄
avg_prec_score,▂▂▃▁▆▆▅▅▅█▂▇▃▇▅▄▆▅▃▃▃▄▅▃▃▅▅▄▅▅▇▆▃▄▅▃▃▅▂▄
bceloss,█▆▆▄▃▃▃▃▂▃▂▂▂▂▃▃▃▂▂▂▂▃▂▃▁▂▂▂▂▂▂▁▃▃▃▃▂▂▁▂
best_accuracy,▁▁▃▆▆▆▅▃▅▆█
best_epoch,▁▁▂▃▃▄▄▅▅▆█
best_iter,▁▁▂▃▃▄▄▅▅▆█
best_roc_auc,▁▂▃▄▅▆▆▆▇██
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
f1_max,▁▂▂▃▅▅▄▆▄█▁▇▃▇▆▄▆▅▃▃▂▄▄▃▄▅▅▆▄▅▇▅▄▅▆▃▂▅▄▄
gumbel_temp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
auc_pr,0.57
avg_prec_score,0.59695
bceloss,0.54252
best_roc_auc,0.65833
epoch,60.0
f1_max,0.66896
gumbel_temp,2.5
kappa,0.09138
kappa_max,0.38178
lambda_sharing,0.05


###  Some data peeks  

In [29]:
pp.pprint(environ.val_metrics['sparsity']['total'])
pp.pprint(environ.val_metrics['sharing']['total'])
pp.pprint(environ.val_metrics['sharing']['total'] +environ.val_metrics['sparsity']['total'])
pp.pprint(environ.val_metrics['task'])
pp.pprint(environ.val_metrics['total'])
pp.pprint(environ.val_metrics['epoch'])

0.0011841206578537822
2.362737723160535e-05
0.0012077480350853875
{   'task1': 0.22806914488911734,
    'task10': 0.07729441115807154,
    'task2': 0.2615806116049444,
    'task3': 0.6873060537261522,
    'task4': 0.1177855923062318,
    'task5': 0.352459824837004,
    'task6': 0.08289676805336933,
    'task7': 0.1123096017483592,
    'task8': 0.17276770787165321,
    'task9': 0.2891627654132546,
    'total': 2.381632481608156}
{   'policy': 0.0012077480350853875,
    'task': 2.381632481608156,
    'total': 2.3828402296432416,
    'total_mean': nan}
5


In [26]:
from utils             import censored_mse_loss, censored_mae_loss, aggregate_results
task_key = 'task2'
print(environ.val_data[task_key]['yc_aggr_weights'].sum())
print(environ.val_data[task_key]['yc_aggr_weights'])
print(environ.val_metrics[task_key]['classification'])
# print(environ.val_metrics[task_key]['classification'].sum())
print(environ.val_metrics[task_key]['classification_agg'])
# print(environ.val_data[task_key]['yc_aggr_weights'])
# print((environ.batch_data[task_key]['yc_aggr_weights']==environ.val_data[task_key]['yc_aggr_weights']).all())


tmp = aggregate_results(environ.val_metrics[task_key]["classification"], 
                      environ.val_data[task_key]['yc_aggr_weights'],
                      verbose = True)
 

258.0
[0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 0. 0. 0. 1.
 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0.
 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0.
 1. 1. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 1. 0.
 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 0. 1. 

In [28]:
# del all_tgs, all_tgs2
del con,con2

NameError: name 'con' is not defined

In [32]:
# del con
ttl = 0

# con = np.ndarray()
appd_df = []
for i in range(1,11):
    task_key = f"task{i}"
    print(i, task_key, ' shape: ', environ.val_data[task_key]['yc_aggr_weights'].shape,  'classifiaction:', environ.val_metrics[task_key]['classification'].shape)
    tmp_df = environ.val_metrics[task_key]['classification'].where(pd.isnull,1)
    print(tmp_df.sum(axis=0))
    
    if i == 1:
        con = np.copy(environ.val_data[task_key]['yc_aggr_weights'])
        all_tgs = environ.val_metrics[task_key]['classification'].copy()
        print("initialize", con.shape, all_tgs.shape)
    else:
        con = np.hstack((con, environ.val_data[task_key]['yc_aggr_weights']))
        all_tgs = all_tgs.append(environ.val_metrics[task_key]['classification'])
        print("concatenate: ",task_key, "    ", con.shape, all_tgs.shape)
        
    ttl += environ.val_data[task_key]['yc_aggr_weights'].shape[0]
    
print('ttl : ', ttl,  'con.shape:', con.shape, 'all_tgs.shape', all_tgs.shape)

1 task1  shape:  (472,) classifiaction: (472, 9)
roc_auc_score     293.0
auc_pr            293.0
avg_prec_score    293.0
f1_max            293.0
p_f1_max          293.0
kappa             293.0
kappa_max         293.0
p_kappa_max       293.0
bceloss           293.0
dtype: float64
initialize (472,) (472, 9)
2 task2  shape:  (624,) classifiaction: (624, 9)
roc_auc_score     386.0
auc_pr            386.0
avg_prec_score    386.0
f1_max            386.0
p_f1_max          386.0
kappa             386.0
kappa_max         386.0
p_kappa_max       386.0
bceloss           386.0
dtype: float64
concatenate:  task2      (1096,) (1096, 9)
3 task3  shape:  (688,) classifiaction: (688, 9)
roc_auc_score     610.0
auc_pr            610.0
avg_prec_score    610.0
f1_max            610.0
p_f1_max          610.0
kappa             610.0
kappa_max         610.0
p_kappa_max       610.0
bceloss           610.0
dtype: float64
concatenate:  task3      (1784,) (1784, 9)
4 task4  shape:  (192,) classifiaction: (192, 9

In [None]:
all_tgs2 = pd.concat(environ.val_metrics[f"task{i}"]['classification'] for i in range(1,11))

all_tgs2.info()
all_tgs2.head(50)

In [185]:
con2 = np.hstack([ environ.val_data[f"task{i}"]['yc_aggr_weights'] for i in range(1,11)])
con2.shape

(3568,)

In [179]:
# all_tgs.index = range(all_tgs.shape[0])

In [180]:
# print(all_tgs2[-50:])

In [187]:
all_tgs2_mod = all_tgs2.where(pd.isnull, 1) * con2[:,None]
all_tgs2_mod.sum(axis = 0)

roc_auc_score     448.0
auc_pr            448.0
avg_prec_score    448.0
f1_max            448.0
p_f1_max          448.0
kappa             448.0
kappa_max         448.0
p_kappa_max       448.0
bceloss           448.0
dtype: float64

In [174]:
# con3 = pd.concat([environ.val_metrics['task1']['classification'],environ.val_metrics['task2']['classification'] ])
# print(con3)

In [188]:
tmp2 = aggregate_results(all_tgs2, con2, verbose = True)
 

 wsum: 1314.0   df.shape: (3568, 9)   df2: (3568, 9)  df2.sum(axis=0): 
 roc_auc_score     448.0
auc_pr            448.0
avg_prec_score    448.0
f1_max            448.0
p_f1_max          448.0
kappa             448.0
kappa_max         448.0
p_kappa_max       448.0
bceloss           448.0
dtype: float64

  DIVISOR 
-----------
roc_auc_score     0.002232
auc_pr            0.002232
avg_prec_score    0.002232
f1_max            0.002232
p_f1_max          0.002232
kappa             0.002232
kappa_max         0.002232
p_kappa_max       0.002232
bceloss           0.002232
dtype: float64

  DF 
------
      roc_auc_score    auc_pr  avg_prec_score    f1_max  p_f1_max  kappa  kappa_max  p_kappa_max   bceloss
task                                                                                                      
0               NaN       NaN             NaN       NaN       NaN    NaN        NaN          NaN       NaN
1          0.000000  0.250000        0.500000  0.666667  0.320208    0.0       

In [203]:
pp.pprint(environ.val_metrics['aggregated'])
print(environ.val_metrics['aggregated']['sc_loss'] )
print(environ.val_metrics['aggregated']["logloss"] ) 

{   'auc_pr': 0.6818710653441948,
    'avg_prec_score': 0.7442062851089334,
    'bceloss': 0.6360303774875189,
    'f1_max': 0.7990044449679128,
    'kappa': 0.08006046274015079,
    'kappa_max': 0.5820449674723311,
    'logloss': 0.00032448763622636064,
    'p_f1_max': 0.48441957962599447,
    'p_kappa_max': 0.532944236640885,
    'roc_auc_score': 0.6855984778924409,
    'sc_loss': 0.24956344102169398}
0.24956344102169398
0.00032448763622636064


In [201]:
pp.pprint(tmp2)
pp.pprint(tmp3)

roc_auc_score     0.685598
auc_pr            0.681871
avg_prec_score    0.744206
f1_max            0.799004
p_f1_max          0.484420
kappa             0.080060
kappa_max         0.582045
p_kappa_max       0.532944
bceloss           0.636030
dtype: float64


In [190]:
all_tasks_classification_metrics = []
all_tasks_aggregation_weights    = [] 

for i in range(1,11):
    task_key = f"task{i}"
    print(i, task_key, ' shape: ', environ.val_data[task_key]['yc_aggr_weights'].shape,  'classifiaction:', environ.val_metrics[task_key]['classification'].shape)
    tmp_df = environ.val_metrics[task_key]['classification'].where(pd.isnull,1)
    print(tmp_df.sum(axis=0))
    
    all_tasks_classification_metrics.append(environ.val_metrics[task_key]['classification'])
    all_tasks_aggregation_weights.append(environ.val_data[task_key]['yc_aggr_weights'])
            

1 task1  shape:  (472,) classifiaction: (472, 9)
roc_auc_score     60.0
auc_pr            60.0
avg_prec_score    60.0
f1_max            60.0
p_f1_max          60.0
kappa             60.0
kappa_max         60.0
p_kappa_max       60.0
bceloss           60.0
dtype: float64
2 task2  shape:  (624,) classifiaction: (624, 9)
roc_auc_score     46.0
auc_pr            46.0
avg_prec_score    46.0
f1_max            46.0
p_f1_max          46.0
kappa             46.0
kappa_max         46.0
p_kappa_max       46.0
bceloss           46.0
dtype: float64
3 task3  shape:  (688,) classifiaction: (688, 9)
roc_auc_score     126.0
auc_pr            126.0
avg_prec_score    126.0
f1_max            126.0
p_f1_max          126.0
kappa             126.0
kappa_max         126.0
p_kappa_max       126.0
bceloss           126.0
dtype: float64
4 task4  shape:  (192,) classifiaction: (192, 9)
roc_auc_score     29.0
auc_pr            29.0
avg_prec_score    29.0
f1_max            29.0
p_f1_max          29.0
kappa         

In [None]:
all_tgs3 = pd.concat(all_tasks_classification_metrics)
con3 = np.concatenate(all_tasks_aggregation_weights)

In [191]:
all_tgs3.info()
all_tgs3.head(20)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3568 entries, 0 to 71
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   roc_auc_score   525 non-null    float64
 1   auc_pr          525 non-null    float64
 2   avg_prec_score  525 non-null    float64
 3   f1_max          525 non-null    float64
 4   p_f1_max        525 non-null    float32
 5   kappa           525 non-null    float64
 6   kappa_max       525 non-null    float64
 7   p_kappa_max     525 non-null    float32
 8   bceloss         525 non-null    float64
dtypes: float32(2), float64(7)
memory usage: 250.9 KB


Unnamed: 0_level_0,roc_auc_score,auc_pr,avg_prec_score,f1_max,p_f1_max,kappa,kappa_max,p_kappa_max,bceloss
task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,,,,,,,,,
1,0.0,0.25,0.5,0.666667,0.320208,0.0,0.0,0.587646,0.780148
2,,,,,,,,,
3,,,,,,,,,
4,0.333333,0.166667,0.333333,0.5,0.328739,0.0,0.2,0.328739,0.575516
5,,,,,,,,,
6,,,,,,,,,
7,,,,,,,,,
8,,,,,,,,,
9,,,,,,,,,


In [200]:
tmp3 = aggregate_results( all_tgs3, con3, verbose = True)

 wsum: 1314.0   df.shape: (3568, 9)   df2: (3568, 9)  df2.sum(axis=0): 
 roc_auc_score     448.0
auc_pr            448.0
avg_prec_score    448.0
f1_max            448.0
p_f1_max          448.0
kappa             448.0
kappa_max         448.0
p_kappa_max       448.0
bceloss           448.0
dtype: float64

  DIVISOR 
-----------
roc_auc_score     0.002232
auc_pr            0.002232
avg_prec_score    0.002232
f1_max            0.002232
p_f1_max          0.002232
kappa             0.002232
kappa_max         0.002232
p_kappa_max       0.002232
bceloss           0.002232
dtype: float64

  DF 
------
      roc_auc_score    auc_pr  avg_prec_score    f1_max  p_f1_max  kappa  kappa_max  p_kappa_max   bceloss
task                                                                                                      
0               NaN       NaN             NaN       NaN       NaN    NaN        NaN          NaN       NaN
1          0.000000  0.250000        0.500000  0.666667  0.320208    0.0       

In [199]:
print(all_tgs2[0:1]['roc_auc_score'])
print(all_tgs3[0:1]['roc_auc_score'])
print((all_tgs2[0:1]['roc_auc_score'] == all_tgs3[0:1]['roc_auc_score']).all())
all_tgs2.compare(all_tgs3)

task
0   NaN
Name: roc_auc_score, dtype: float64
task
0   NaN
Name: roc_auc_score, dtype: float64
False


task


In [82]:
print(environ.val_data['task9']['yc_aggr_weights'].shape, con[3152:3496].shape)
print((environ.val_data['task9']['yc_aggr_weights'] == con[3152:3496]).all())a

(344,) (344,)
True


In [None]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n")
print()

In [50]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n")
print()

Best Epoch :       3
Best Iteration :   7977 
Best ROC AUC   :   0.73690
Best Precision :   0.64175




In [22]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n")
print()

Best Epoch :       1
Best Iteration :   2659 
Best ROC AUC   :   0.71991
Best Precision :   0.63031




In [24]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n")
print()
for key in environ.val_metrics['aggregated']:
    print(f"{key:20s}    {environ.val_metrics['aggregated'][key]:0.4f}")
# pp.pprint(environ.val_metrics['aggregated'])

Best Epoch :       2
Best Iteration :   6258 
Best ROC AUC   :   0.79359
Best Precision :   0.72131


roc_auc_score           0.7936
auc_pr                  0.6974
avg_prec_score          0.7213
f1_max                  0.7534
p_f1_max                0.4367
kappa                   0.3055
kappa_max               0.5732
p_kappa_max             0.4947
bceloss                 0.4300
sc_loss                 0.0106
logloss                 0.0000


# Weight & Policy Training

### Weight/Policy Training Preparation

In [58]:
print( f" Backbone (Group 0) Initial LR  : {environ.opt['train']['backbone_lr']:4f} \n"
       f" Tasks    (Group 1) Initial LR  : {environ.opt['train']['task_lr']:4f}    \n Params : {environ.optimizers['weights']} \n\n"
       f" Policy   Initial LR            : {environ.opt['train']['policy_lr']:4f}  \n Params : {environ.optimizers['alphas']}  \n\n")

print( f" Backbone Initial LR            : {environ.opt['train']['backbone_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[0]['lr']} \n"
       f" Tasks    Initial LR            : {environ.opt['train']['task_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[1]['lr']}    \n"
       f" Policy   Initial LR            : {environ.opt['train']['policy_lr']:4f}      Current LR : {environ.optimizers['alphas'].param_groups[0]['lr']}  \n")

 Backbone (Group 0) Initial LR  : 0.001000 
 Tasks    (Group 1) Initial LR  : 0.001000    
 Params : SGD (
Parameter Group 0
    dampening: 0
    lr: 0.0005
    momentum: 0.9
    nesterov: False
    weight_decay: 0.0001

Parameter Group 1
    dampening: 0
    lr: 0.0005
    momentum: 0.9
    nesterov: False
    weight_decay: 0.0001
) 

 Policy   Initial LR            : 0.010000  
 Params : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.01
    weight_decay: 0.0005
)  


 Backbone Initial LR            : 0.001000      Current LR : 0.0005 
 Tasks    Initial LR            : 0.001000      Current LR : 0.0005    
 Policy   Initial LR            : 0.010000      Current LR : 0.01  



In [59]:
print_underline('Weights Scheduler Parameters', verbose = True) 
for k,i in environ.schedulers['weights'].state_dict().items():
    print(f"    {k:20s}     value: {i}")

print_underline('Policy Scheduler Parameters', verbose = True)
for k,i in environ.schedulers['alphas'].state_dict().items():
    print(f"    {k:20s}     value: {i}")    


Weights Scheduler Parameters
------------------------------
    factor                   value: 0.5
    min_lrs                  value: [0, 0]
    patience                 value: 30
    verbose                  value: True
    cooldown                 value: 10
    cooldown_counter         value: 0
    mode                     value: min
    threshold                value: 0.0001
    threshold_mode           value: rel
    best                     value: 2.1187280823000556
    num_bad_epochs           value: 12
    mode_worse               value: inf
    eps                      value: 1e-08
    last_epoch               value: 59
    _last_lr                 value: [0.0005, 0.0005]

Policy Scheduler Parameters
-----------------------------
    factor                   value: 0.5
    min_lrs                  value: [0]
    patience                 value: 30
    verbose                  value: True
    cooldown                 value: 10
    cooldown_counter         value: 0
    mode    

In [None]:
ns.flag = 'update_weights'
model_initializations(ns, opt, environ, phase = ns.flag, policy_learning = True)
# training_initializations(ns, opt, environ, dldrs, warmup = False)

In [26]:
# training_initializations(ns, opt, environ, dldrs, warmup_iterations = 200,  weight_iterations = 2, policy_iterations = 2, eval_iterations = 1, warmup = False)
training_initializations(ns, opt, environ, dldrs, warmup_iterations = 1000, weight_iterations = 750, policy_iterations = 250, eval_iterations = 500, warmup = False)

 training preparation: - check for CUDA - cuda available as device id: [0]
sparsechem_env.cuda()
 policy policy1 is None
 policy policy2 is None
 policy policy3 is None
 policy policy4 is None
 policy policy5 is None
 policy policy6 is None
 policy policy7 is None
 policy policy8 is None
 policy policy9 is None
 policy policy10 is None
 training preparation: - set print_freq to                                 : 1989 
 training preparation: - set number of batches per warmup training epoch to: 1000
 training preparation: - set number of batches per weight training epoch to: 750
 training preparation: - set number of batches per policy training epoch to: 250
 training preparation: - set number of batches per validation to           : 500
 training preparation complete . . .


In [46]:
print( f" Backbone Initial LR            :      {environ.opt['train']['backbone_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[0]['lr']} \n"
       f" Tasks    Initial LR            :      {environ.opt['train']['task_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[1]['lr']}    \n"
       f" Policy   Initial LR            :      {environ.opt['train']['policy_lr']:4f}      Current LR : {environ.optimizers['alphas'].param_groups[0]['lr']}  \n")

 Backbone Initial LR            :      0.001000      Current LR : 0.001 
 Tasks    Initial LR            :      0.001000      Current LR : 0.001    
 Policy   Initial LR            :      0.010000      Current LR : 0.01  



In [28]:
print_heading( f"** {timestring()} \n"
               f"** Training epoch: {ns.current_epoch} iter: {ns.current_iter}   flag: {ns.flag} \n"
               f"** Set optimizer and scheduler to policy_learning = True (Switch weight optimizer from ADAM to SGD)\n"
               f"** Switch from Warm Up training to Alternate training Weights & Policy \n"
               f"** Take checkpoint and block gradient flow through Policy net", verbose=True)

------------------------------------------------------------------------------------------------------------------------
** 2022-08-27 07:56:46:869319 
** Training epoch: 60 iter: 60000   flag: update_weights 
** Set optimizer and scheduler to policy_learning = True (Switch weight optimizer from ADAM to SGD)
** Switch from Warm Up training to Alternate training Weights & Policy 
** Take checkpoint and block gradient flow through Policy net
------------------------------------------------------------------------------------------------------------------------ 



In [51]:
print(environ.opt['train']['lambda_sparsity'])
print(environ.opt['train']['lambda_sharing'])
print(environ.opt['train']['decay_temp_freq'])

0.001
0.05
3


In [53]:
# environ.opt['is_curriculum'] = True
# environ.opt['curriculum_speed'] = 4
# ns.num_train_layers = None


environ.opt['train']['lambda_sparsity'] = 0.01
# environ.opt['train']['lambda_sharing']  = 0.01
# environ.opt['train']['decay_temp_freq'] = 6
print(environ.opt['train']['lambda_sparsity'])
# print(environ.opt['train']['lambda_sharing'])
# print(environ.opt['train']['decay_temp_freq'])

0.01


In [54]:
print( f" ns.flag                        :      {ns.flag}")
print( f" num_train_layers               :      {ns.num_train_layers}")
print( f" environ.opt['is_curriculum']   :      {environ.opt['is_curriculum']}")
print( f" environ.opt['curriculum_speed']:      {environ.opt['curriculum_speed']}\n")
print( f" Backbone Initial LR            :      {environ.opt['train']['backbone_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[0]['lr']} \n"
       f" Tasks    Initial LR            :      {environ.opt['train']['task_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[1]['lr']}    \n"
       f" Policy   Initial LR            :      {environ.opt['train']['policy_lr']:4f}      Current LR : {environ.optimizers['alphas'].param_groups[0]['lr']}  \n")

print( f" Hard Sampling                  :      {environ.opt['train']['hard_sampling']}\n")

print( f" Sparsity regularization        :      {environ.opt['train']['lambda_sparsity']}\n"
       f" Sharing  regularization        :      {environ.opt['train']['lambda_sharing']} \n"
       f" Tasks    regularization        :      {environ.opt['train']['lambda_tasks']}   \n\n")

print( f" Gumbel Temp                    :      {environ.gumbel_temperature:.4f}         \n" #
       f" Gumbel Temp decay              :      {environ.opt['train']['decay_temp_freq']} \n") #

print( f" ns.current_epoch               :      {ns.current_epoch}")
print( f" ns.training_epochs             :      {ns.training_epochs} \n") 
print( f" ns.current_iters               :      {ns.current_iter}")  
print( f" Batches in warmup epoch        :      {ns.trn_iters_warmup}")
print( f" Batches in weight epoch        :      {ns.trn_iters_weights}")
print( f" Batches in policy epoch        :      {ns.trn_iters_policy}")
print( f" Batches in validation          :      {ns.eval_iters}")
print( f" num_train_layers               :      {ns.num_train_layers} \n")

 ns.flag                        :      update_weights
 num_train_layers               :      6
 environ.opt['is_curriculum']   :      False
 environ.opt['curriculum_speed']:      3

 Backbone Initial LR            :      0.001000      Current LR : 0.0005 
 Tasks    Initial LR            :      0.001000      Current LR : 0.0005    
 Policy   Initial LR            :      0.010000      Current LR : 0.01  

 Hard Sampling                  :      False

 Sparsity regularization        :      0.01
 Sharing  regularization        :      0.05 
 Tasks    regularization        :      1.0   


 Gumbel Temp                    :      0.0251         
 Gumbel Temp decay              :      3 

 ns.current_epoch               :      112
 ns.training_epochs             :      20 

 ns.current_iters               :      109884
 Batches in warmup epoch        :      1000
 Batches in weight epoch        :      750
 Batches in policy epoch        :      250
 Batches in validation          :      500
 num_t

In [55]:
print_loss(ns.val_metrics, title = f"[e] Last ep:{ns.current_epoch}  it:{ns.current_iter} ")
print()
environ.display_trained_policy(ns.current_epoch)
environ.display_trained_logits(ns.current_epoch)
environ.display_current_policy(ns.current_epoch)

print_heading(f" Last Epoch Completed : {ns.current_epoch}       # of epochs to run:  {ns.training_epochs} -->  epochs {ns.current_epoch+1} to {ns.training_epochs + ns.current_epoch}"
              f"\n policy_learning rate : {environ.opt['train']['policy_lr']} "
              f"\n lambda_sparsity      : {environ.opt['train']['lambda_sparsity']}"
              f"\n lambda_sharing       : {environ.opt['train']['lambda_sharing']}"
              f"\n curriculum training  : {opt['is_curriculum']}     cirriculum speed: {opt['curriculum_speed']}     num_training_layers : {ns.num_train_layers}", 
              verbose = True)

[e] Last ep:112  it:109884  -  Losses:   	 Task: 2.1290   	 Sparsity: 8.74282e-05    	 Sharing: 1.40599e-03    	 Total: 2.1305 


 ep:  112    softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s         
 ----- ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    
  0    0.5842    0.4158  1    0.6929    0.3071  1    0.6500    0.3500  1    0.5901    0.4099  1    0.6034    0.3966  1    0.6060    0.3940  1    0.6332    0.3668  1    0.6210    0.3790  1    0.6164    0.3836  1    0.6103    0.3897  1
  1    0.6889    0.3111  1    0.7872    0.2128  1    0.6184    0.3816  1    0.5517    0.4483  1    0.6047    0.3953  1    0.6282    0.3718  1    0.6472    

### Weight/Policy Training

In [None]:
weight_policy_training(ns, opt, environ, dldrs, display_policy = True, disable_tqdm = False, epochs = 4)


------------------------------------------------------------------------------------------------------------------------
 Last Epoch Completed : 142       # of epochs to run:  4 -->  epochs 143 to 146
 Backbone Initial LR  : 0.001      Current LR : 0.00025 
 Heads    Initial LR  : 0.001      Current LR : 0.00025
 Policy   Initial LR  : 0.01      Current LR : 0.01
 Regularization tasks : 1.0          Sparsity: 0.01           sharing: 0.05
 curriculum training  : False      Cirriculum speed: 3     num_training_layers : 6
------------------------------------------------------------------------------------------------------------------------ 

Ep: 143 [weights]:  12%|█▌           | 87/750 [01:17<09:59,  1.11it/s, it=139971, Lss=1.2706, Spr=7.9436e-04, Shr=2.9583e-03, lyr=6]

In [104]:
print_metrics_cr(ns.current_epoch,  time.time() - time.time() , ns.trn_losses, ns.val_metrics, 1, out=[sys.stdout]) 
print()
print_loss(ns.val_metrics, title = f"[e] Last ep:{ns.current_epoch}  it:{ns.current_iter} ")
print()
print(f'   best_epoch: {ns.best_epoch:5d}   best iter: {ns.best_iter:5d}'
      f'   best_accuracy: {ns.best_accuracy:.5f}    best ROC auc: {ns.best_roc_auc:.5f}')      

 142 | 2.50e-04  2.50e-04  1.00e-02  1.41e-03 |   1.5489   7.945e-04   2.919e-03    1.5526 |  5.549e-06   0.46255   0.67239   0.75861   0.65714   0.70673 |   2.1304   8.944e-04   3.331e-03    2.1346 |  -0.0 |

[e] Last ep:142  it:139884  -  Losses:   	 Task: 2.1304   	 Sparsity: 8.94442e-04    	 Sharing: 3.33104e-03    	 Total: 2.1346 

   best_epoch:    99   best iter: 96884   best_accuracy: 0.67959    best ROC auc: 0.76435


In [105]:
print_underline('Weights Scheduler Parameters', verbose = True) 
for k,i in environ.schedulers['weights'].state_dict().items():
    print(f"    {k:20s}     value: {i}")

print_underline('Policy Scheduler Parameters', verbose = True)
for k,i in environ.schedulers['alphas'].state_dict().items():
    print(f"    {k:20s}     value: {i}")    


Weights Scheduler Parameters
------------------------------
    factor                   value: 0.5
    min_lrs                  value: [0, 0]
    patience                 value: 30
    verbose                  value: True
    cooldown                 value: 10
    cooldown_counter         value: 9
    mode                     value: min
    threshold                value: 0.0001
    threshold_mode           value: rel
    best                     value: 2.1187280823000556
    num_bad_epochs           value: 0
    mode_worse               value: inf
    eps                      value: 1e-08
    last_epoch               value: 79
    _last_lr                 value: [0.00025, 0.00025]

Policy Scheduler Parameters
-----------------------------
    factor                   value: 0.5
    min_lrs                  value: [0]
    patience                 value: 30
    verbose                  value: True
    cooldown                 value: 10
    cooldown_counter         value: 0
    mode   

In [107]:
# environ.schedulers['alphas'].patience = 15

### Close WandB run

In [35]:
ns.wandb_run.finish()

# Misc Code 

### Check values

In [None]:
# ns.best_epoch = 0
# from utils.notebook_modules import wrapup_phase
# wrapup_phase(ns, opt, environ)

In [None]:
# environ.opt['train']['policy_lr']       = 0.002
# environ.opt['train']['lambda_sparsity'] = 0.05
# environ.opt['train']['lambda_sharing']  = 0.01
# environ.opt['train']['lambda_tasks']    = 1.0
# # environ.opt['train']['decay_temp_freq'] = 2

In [None]:
print(opt['diff_sparsity_weights'])
print(opt['is_sharing'])
print(opt['diff_sparsity_weights'] and not opt['is_sharing'])
print(environ.opt['train']['Lambda_sharing'])
print(opt['train']['Lambda_sharing'])
print(environ.opt['train']['Lambda_sparsity'])
print(opt['train']['Lambda_sparsity'])
print(environ.opt['train']['policy_lr'])
print(opt['train']['policy_lr'])

In [None]:
print( f" Backbone Learning Rate      : {environ.opt['train']['backbone_lr']}\n"
       f" Tasks    Learning Rate      : {environ.opt['train']['task_lr']}\n"
       f" Policy   Learning Rate      : {environ.opt['train']['policy_lr']}\n")

print( f" Sparsity regularization     : {environ.opt['train']['lambda_sparsity']}\n"
       f" Sharing  regularization     : {environ.opt['train']['lambda_sharing']} \n\n"
       f" Tasks    regularization     : {environ.opt['train']['lambda_tasks']}   \n"
       f" Gumbel Temp                 : {environ.gumbel_temperature:.4f}         \n" 
       f" Gumbel Temp decay           : {environ.opt['train']['decay_temp_freq']}\n") 

print( f" current_iters               : {ns.current_iter}   \n"
       f" current_epochs              : {ns.current_epoch}  \n" 
       f" train_total_epochs          : {ns.training_epochs}\n" 
       f" stop_epoch_training         : {ns.stop_epoch_training}")

In [44]:
from torch import nn

In [49]:
for name, param in environ.networks['mtl-net'].named_parameters():
    print(f" {name:40s}  {param.shape} ")

 task1_logits                              torch.Size([6, 2]) 
 task2_logits                              torch.Size([6, 2]) 
 task3_logits                              torch.Size([6, 2]) 
 task4_logits                              torch.Size([6, 2]) 
 task5_logits                              torch.Size([6, 2]) 
 task6_logits                              torch.Size([6, 2]) 
 task7_logits                              torch.Size([6, 2]) 
 task8_logits                              torch.Size([6, 2]) 
 task9_logits                              torch.Size([6, 2]) 
 task10_logits                             torch.Size([6, 2]) 
 backbone.Input_Layer.linear.weight        torch.Size([32000, 4000]) 
 backbone.Input_Layer.linear.bias          torch.Size([4000]) 
 backbone.blocks.0.linear.weight           torch.Size([4000, 4000]) 
 backbone.blocks.0.linear.bias             torch.Size([4000]) 
 backbone.blocks.1.linear.weight           torch.Size([4000, 4000]) 
 backbone.blocks.1.linear.bias      

In [50]:
for name, param in environ.networks['mtl-net'].backbone.named_parameters():
        print(f" {name:40s}  {param.shape} ")

 Input_Layer.linear.weight                 torch.Size([32000, 4000]) 
 Input_Layer.linear.bias                   torch.Size([4000]) 
 blocks.0.linear.weight                    torch.Size([4000, 4000]) 
 blocks.0.linear.bias                      torch.Size([4000]) 
 blocks.1.linear.weight                    torch.Size([4000, 4000]) 
 blocks.1.linear.bias                      torch.Size([4000]) 
 blocks.2.linear.weight                    torch.Size([4000, 4000]) 
 blocks.2.linear.bias                      torch.Size([4000]) 
 blocks.3.linear.weight                    torch.Size([4000, 4000]) 
 blocks.3.linear.bias                      torch.Size([4000]) 
 blocks.4.linear.weight                    torch.Size([4000, 4000]) 
 blocks.4.linear.bias                      torch.Size([4000]) 
 blocks.5.linear.weight                    torch.Size([4000, 4000]) 
 blocks.5.linear.bias                      torch.Size([4000]) 


In [53]:
for name, param in environ.networks['mtl-net'].named_parameters():
    if 'task' in name and 'fc' in name:    
        print(f" {name:40s}  {param.shape} ")

 task1_fc1_c0.linear.weight                torch.Size([472, 4000]) 
 task1_fc1_c0.linear.bias                  torch.Size([472]) 
 task2_fc1_c0.linear.weight                torch.Size([624, 4000]) 
 task2_fc1_c0.linear.bias                  torch.Size([624]) 
 task3_fc1_c0.linear.weight                torch.Size([688, 4000]) 
 task3_fc1_c0.linear.bias                  torch.Size([688]) 
 task4_fc1_c0.linear.weight                torch.Size([192, 4000]) 
 task4_fc1_c0.linear.bias                  torch.Size([192]) 
 task5_fc1_c0.linear.weight                torch.Size([620, 4000]) 
 task5_fc1_c0.linear.bias                  torch.Size([620]) 
 task6_fc1_c0.linear.weight                torch.Size([184, 4000]) 
 task6_fc1_c0.linear.bias                  torch.Size([184]) 
 task7_fc1_c0.linear.weight                torch.Size([224, 4000]) 
 task7_fc1_c0.linear.bias                  torch.Size([224]) 
 task8_fc1_c0.linear.weight                torch.Size([148, 4000]) 
 task8_fc1_c0.linear.b

In [None]:
num_blocks = 6
num_policy_layers = 6
gt =  torch.ones((num_blocks)).long()
gt0 =  torch.zeros((num_blocks)).long()
print(gt)
print(gt0)

loss_weights = ((torch.arange(0, num_policy_layers, 1) + 1).float() / num_policy_layers)
print(loss_weights)

In [None]:
if environ.opt['diff_sparsity_weights'] and not environ.opt['is_sharing']:
    print(' cond 1')
    ## Assign higher weights to higher layers 
    loss_weights = ((torch.arange(0, num_policy_layers, 1) + 1).float() / num_policy_layers)
    print(f"{task_key} sparsity error:  {2 * (loss_weights[-num_blocks:] * environ.cross_entropy2(logits[-num_blocks:], gt)).mean()})")
    print_dbg(f" loss_weights :  {loss_weights}", verbose = True)
    print_dbg(f" cross_entropy:  {environ.cross_entropy2(logits[-num_blocks:], gt)}  ", verbose = True)
    print_dbg(f" loss[sparsity][{task_key}]: {self.losses['sparsity'][task_key] } ", verbose = True)

else:
    print('\n cond 2')
    print_dbg(f"Compute CrossEntropyLoss between \n Logits   : \n{logits[-num_blocks:]} \n and gt: \n{gt} \n", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[-num_blocks:], gt)}")
    
    print('\n cond 2')
    print_dbg(f"Compute CrossEntropyLoss between Logits      : {logits[-1:]}  and gt: {gt[-1]} ", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[-1:], gt[-1:])} \n")
    print_dbg(f"Compute CrossEntropyLoss between Logits      : {logits[-1:]}  and gt: {gt0[-1]} ", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[-1:], gt0[-1:])} \n")
    
    print('\n cond 3')    
    print_dbg(f"Compute CrossEntropyLoss between Logits   : {logits[0:1]}  and gt: {gt[0:1]} ", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[0:1], gt[0:1])} \n")
    print_dbg(f"Compute CrossEntropyLoss between Logits   : {logits[0:1]}  and gt: {gt0[0:1]} ", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[0:1], gt0[0:1])} \n")
        
        

In [None]:
print(" ns.check_for_improvment_wait:  {ns.check_for_improvment_wait}")
print(" ns.curriculum_epochs:          {ns.curriculum_epochs}")

In [None]:
# pp.pprint(environ.val_metrics)
df = environ.val_metrics['task1']['classification']

In [None]:
print(df[pd.notna(df.roc_auc_score)])

In [None]:
df[pd.notna(df.roc_auc_score)].mean()

In [None]:
# environ.display_trained_policy(ns.current_epoch,out=[sys.stdout])
# environ.num_tasks
# print(environ.get_policy_prob().shape)
# print(environ.val_data['task1'].keys())
# print(environ.val_data['task1']['yc_ind'][0][:40])
# print(environ.val_data['task1']['yc_ind'][1][:40])
# print(environ.val_data['task1']['yc_data'][:40])
# print(environ.val_data['task1']['yc_hat'][:40])
# environ.display_trained_policy(ns.current_epoch,out=[sys.stdout])
# environ.display_trained_logits(ns.current_epoch,out=[sys.stdout])
batch = next(dldrs.warmup_trn_loader)   

In [None]:
batch.keys()

### Losses and Metrics

In [95]:
print(f" val_metric keys               : {ns.val_metrics.keys()}")
print(f" aggreagted keys               : {ns.val_metrics['aggregated'].keys()}")
print(f" task keys                     : {ns.val_metrics['task'].keys()}")
print(f" task / task1 keys             : {ns.val_metrics['task']['task1']}")
print(f" sparsity keys                 : {ns.val_metrics['sparsity'].keys()}")
print(f" total keys                    : {ns.val_metrics['total'].keys()}")
print(f" aggregated keys               : {ns.val_metrics['aggregated'].keys()}")
print()
print(f" task1 keys                    : {ns.val_metrics['task1'].keys()}")
print(f" task1 classification keys     : {ns.val_metrics['task1']['classification'].keys()}")
print(f" task1 classification_agg keys : {ns.val_metrics['task1']['classification_agg'].keys()}")

print()
print(f" task1 agg sc_loss             : {ns.val_metrics['task1']['classification_agg']['sc_loss']:5f}")
print(f" task1 agg bce_loss            : {ns.val_metrics['task1']['classification_agg']['bceloss']:5f}")
print(f" task1 agg bce_loss            : {ns.val_metrics['task1']['classification_agg']['logloss']:5f}")
print(f" task-task1                    : {ns.val_metrics['task']['task1']:5f}")
print(f" task-task1                    : \n  {ns.val_metrics['task1']['classification']}")
print(f" task-task1                    : \n  {ns.val_metrics['task1']['classification_agg']}")

print()
print(f" task2                         : {ns.val_metrics['task2']['classification_agg']['sc_loss']:5f}")
print(f" task3                         : {ns.val_metrics['task3']['classification_agg']['sc_loss']:5f}")
print(f" loss                          : {ns.val_metrics['loss']['total']:5f}")
print(f" train_time                    : {ns.val_metrics['train_time']:2f}")
print(f" epoch                         : {ns.val_metrics['epoch']}")


 val_metric keys               : dict_keys(['parms', 'task', 'task_mean', 'sparsity', 'sharing', 'total', 'task1', 'task2', 'task3', 'task4', 'task5', 'task6', 'task7', 'task8', 'task9', 'task10', 'aggregated', 'train_time', 'epoch'])
 aggreagted keys               : dict_keys(['roc_auc_score', 'auc_pr', 'avg_prec_score', 'f1_max', 'p_f1_max', 'kappa', 'kappa_max', 'p_kappa_max', 'bceloss', 'sc_loss', 'logloss'])
 task keys                     : dict_keys(['total', 'task1', 'task2', 'task3', 'task4', 'task5', 'task6', 'task7', 'task8', 'task9', 'task10'])
 task / task1 keys             : 0.18089021539345657
 sparsity keys                 : dict_keys(['total', 'task1', 'task2', 'task3', 'task4', 'task5', 'task6', 'task7', 'task8', 'task9', 'task10'])
 total keys                    : dict_keys(['total', 'total_mean', 'task', 'policy'])
 aggregated keys               : dict_keys(['roc_auc_score', 'auc_pr', 'avg_prec_score', 'f1_max', 'p_f1_max', 'kappa', 'kappa_max', 'p_kappa_max', 'bcelo

KeyError: 'loss'

In [98]:
environ.batch_data['task1']['yc_trn_weights'].shape

torch.Size([472])

In [None]:
tmp = environ.get_loss_dict()
print(tmp.keys())
pp.pprint(tmp)

In [None]:
type(ns.val_metrics['aggregated'])

In [67]:
pp.pprint(ns.trn_losses)

{   'parms': {   'gumbel_temp': 0.004459516789125928,
                 'lambda_sharing': 0.05,
                 'lambda_sparsity': 0.01,
                 'lambda_tasks': 1.0,
                 'lr_0': 0.0005,
                 'lr_1': 0.0005,
                 'policy_lr': 0.01,
                 'train_layers': 6},
    'sharing': {   'total': tensor(0.0017, device='cuda:0', grad_fn=<DivBackward0>)},
    'sparsity': {   'task1': tensor(8.4396e-05, device='cuda:0', grad_fn=<DivBackward0>),
                    'task10': tensor(6.8022e-05, device='cuda:0', grad_fn=<DivBackward0>),
                    'task2': tensor(8.6199e-05, device='cuda:0', grad_fn=<DivBackward0>),
                    'task3': tensor(9.3598e-05, device='cuda:0', grad_fn=<DivBackward0>),
                    'task4': tensor(7.2445e-05, device='cuda:0', grad_fn=<DivBackward0>),
                    'task5': tensor(8.7382e-05, device='cuda:0', grad_fn=<DivBackward0>),
                    'task6': tensor(7.7664e-05, device='cud

In [70]:
pp.pprint(environ.val_metrics)

{   'aggregated': {   'auc_pr': 0.6615142171123191,
                      'avg_prec_score': 0.6754890273504701,
                      'bceloss': 0.4622365384594069,
                      'f1_max': 0.7105706539493313,
                      'kappa': 0.2440221960877036,
                      'kappa_max': 0.4854695532094693,
                      'logloss': 5.540098849245579e-06,
                      'p_f1_max': 0.3768589172211572,
                      'p_kappa_max': 0.4539545361340589,
                      'roc_auc_score': 0.762805749290287,
                      'sc_loss': 0.004264734853556151},
    'epoch': 132,
    'parms': {   'gumbel_temp': 0.004459516789125928,
                 'lambda_sharing': 0.05,
                 'lambda_sparsity': 0.01,
                 'lambda_tasks': 1.0,
                 'lr_0': 0.0005,
                 'lr_1': 0.0005,
                 'policy_lr': 0.01,
                 'train_layers': 0},
    'sharing': {'total': 0.0018787450389936566},
    'sparsity':

### val_data

In [100]:
dldrs.val_loader.dataset

<dataloaders.chembl_dataloader.ClassRegrSparseDataset_v3 at 0x7fbad04449d0>

In [101]:
dldrs.val_loader.dataset.y_class_list

[<86274x472 sparse matrix of type '<class 'numpy.float64'>'
 	with 50791 stored elements in Compressed Sparse Row format>,
 <86274x624 sparse matrix of type '<class 'numpy.float64'>'
 	with 65658 stored elements in Compressed Sparse Row format>,
 <86274x688 sparse matrix of type '<class 'numpy.float64'>'
 	with 136843 stored elements in Compressed Sparse Row format>,
 <86274x192 sparse matrix of type '<class 'numpy.float64'>'
 	with 30136 stored elements in Compressed Sparse Row format>,
 <86274x620 sparse matrix of type '<class 'numpy.float64'>'
 	with 64840 stored elements in Compressed Sparse Row format>,
 <86274x184 sparse matrix of type '<class 'numpy.float64'>'
 	with 19189 stored elements in Compressed Sparse Row format>,
 <86274x224 sparse matrix of type '<class 'numpy.float64'>'
 	with 28096 stored elements in Compressed Sparse Row format>,
 <86274x148 sparse matrix of type '<class 'numpy.float64'>'
 	with 34404 stored elements in Compressed Sparse Row format>,
 <86274x344 spa

In [None]:
(environ.val_data['task1']['yc_data'][0] == environ.val_data['task1']['yc_data']).all()

In [None]:
from utils.sparsechem_utils import compute_metrics, aggregate_results
import pandas
cc = compute_metrics(cols   = environ.val_data['task1']['yc_ind'][1], 
                     y_true = environ.val_data['task1']['yc_data'], 
                     y_score= environ.val_data['task1']['yc_hat'] ,
                     num_tasks=100)


In [None]:
 df   = pd.DataFrame({"task"   : environ.val_data['task1']['yc_ind'][1], 
                      "y_true" : environ.val_data['task1']['yc_data'],  
                      "y_score": environ.val_data['task1']['yc_hat']})

In [None]:
for task, frame in df.groupby("task", sort=True):
    print(f" task {task}")
    print(frame.head(10))

In [None]:
# df
df.groupby("task", sort=True).count()

In [None]:
pp.pprint(environ.val_metrics)

In [None]:
print(environ.batch_data['task1']['yc_aggr_weights'])
environ.batch['task1']['aggr_weights']

In [None]:
c2 = aggregate_results(cc)

In [None]:
dldrs.trainset0.tasks_weights_list

### Post Warm-up Training stuff

In [None]:
get_all_task_logits
    "p = environ.get_sample_policy(hard_sampling = False)\n"print(p)
p = environ.get_policy_prob()
print(p)
p = environ.get_policy_logits()
print(p)

# p = environ.get_current_policy()
# print(p)

In [None]:
a = softmax([0.0, 1])
print(a)
sampled = np.random.choice((1, 0), p=a)
print(sampled)

In [None]:
print(environ.optimizers['weights'])
print(environ.schedulers['weights'].get_last_lr())

In [None]:
print('losses.keys      : ', environ.losses.keys())
print('losses[task]keys : ', environ.losses['task1'].keys())
pp.pprint(environ.losses)

In [None]:
print( environ.val_metrics.keys())
# pp.pprint(val_metrics)
print(type(environ.val_metrics['aggregated']))
print()
print(type(environ.val_metrics['task1']['classification_agg']))
print()
pp.pprint(environ.val_metrics)

### Policy / Logit stuff

In [None]:
from scipy.special          import softmax

In [None]:
np.set_printoptions(precision=8,edgeitems=3, infstr='inf', linewidth=150, nanstr='nan')
torch.set_printoptions(precision=8,linewidth=132)

#### `get_task_logits(n)` Get logits for task group n

In [None]:
task_logits = environ.get_task_logits(1)
print(task_logits)

#### `get_arch_parameters()`: Get last used logits from network

In [None]:
import torch.optim as optim
arch_parameters      = environ.get_arch_parameters()
print(arch_parameters)

In [None]:
import torch.optim as optim
arch_parameters      = environ.get_arch_parameters()
print(arch_parameters)

#### `get_policy_logits()`:  Get Policy Logits - returns same as `get_arch_parameters()`

In [None]:
logs = environ.get_policy_logits()
for i in logs:
    print(i, '\n')
# probs = softmax(logs, axis= -1)
# for i in probs:
#     print(i, '\n')

#### `get_policy_prob()` : Gets the softmax of the logits

In [None]:
policy_softmaxs = environ.get_policy_prob()
for i in policy_softmaxs:
    print(i, '\n')

#### `get_sample_policy( hard_sampling = False)` : Calls test_sample_policy of network with random choices based on softmax of logits

In [None]:
policy_softmaxs = environ.get_policy_prob()
policies,logits = environ.get_sample_policy(hard_sampling = False)

for l, p, s in zip(logits, policies, policy_softmaxs) :
    for  l_row, p_row, s_row in zip(l, p, s):
        print( l_row,'\t', p_row, '\t', s_row)
    print('\n')

#### `get_sample_policy( hard_sampling = True)` : Calls test_sample_policy of network using ARGMAX of logits

In [None]:
policy_softmaxs = environ.get_policy_prob()
hard_policies, logits = environ.get_sample_policy(hard_sampling = True)

for p,l,s in zip(hard_policies, logits, policy_softmaxs) :
    for  p_row, l_row, s_row in zip(p, l, s):
        print( l_row,'\t', p_row, '\t', s_row)
    print('\n')

#### Print

In [None]:
print(f" Layer    task 1      task 2      task 3")
print(f" -----    ------      ------      ------")
for idx, (l1, l2, l3) in enumerate(zip(hard_policies[0], hard_policies[1], hard_policies[2]),1):
    print(f"   {idx}      {l1}       {l2}       {l3}")
    

    print(f"\n\n where [p1  p2]:  p1: layer is selected    p2: layer is not selected")

In [None]:
def display_trained_policy(iter):

    policy_softmaxs = environ.get_policy_prob()
    policy_argmaxs = 1-np.argmax(policy_softmaxs, axis = -1)
    print(f"  Trained polcies at iteration: {iter} ")
    print(f"                   task 1                           task 2                         task 3        ")
    print(f" Layer       softmax        select          softmax        select          softmax        select   ")
    print(f" -----    ---------------   ------       ---------------   ------       ---------------   ------   ")
    for idx, (l1,l2,l3,  p1,p2,p3) in enumerate(zip(policy_softmaxs[0], policy_softmaxs[1], policy_softmaxs[2], policy_argmaxs[0], policy_argmaxs[1], policy_argmaxs[2]),1):
        print(f"   {idx}      {l1[0]:.4f}   {l1[1]:.4f}   {p1:4d}    {l2[0]:11.4f}   {l2[1]:.4f}   {p2:4d}    {l3[0]:11.4f}   {l3[1]:.4f}   {p3:4d}")

    print()
# print(f"\n\n where [p1  p2]:  p1: layer is selected    p2: layer is not selected")

In [None]:
display_trained_policy(5)

In [None]:
print(f"                        POLICIES (SOFTMAX)                                       task 3          ")
print(f" Layer    task1              task2            task3 softmax         softmax         argmax         softmax         argmax   ")
print(f" -----    -------------     -------------     -------------   ------   ")
for idx, (l1,l2,l3, h1,h2,h3) in enumerate(zip(policy_softmaxs[0], policy_softmaxs[1], policy_softmaxs[2],hard_policies[0], hard_policies[1], hard_policies[2]),1):
    print(f"   {idx}      {l1[0]:.4f} {l1[1]:.4f}     {l2[0]:.4f} {l2[1]:.4f}     {l3[0]:.4f} {l3[1]:.4f}    {h3}")
    
print(f"\n\n where [p1  p2]:  p1: layer is selected    p2: layer is not selected")

In [None]:
# print(policy_softmaxs[2], np.argmax(1-policy_softmaxs[2], axis = -1))
print(policy_softmaxs, np.argmax(policy_softmaxs, axis = -1))

#### `get_current_logits()` : Calls test_sample_policy of network using ARGMAX of logits

In [None]:
logits  = (environ.get_current_logits())
for i in logits:
    print(i ,'\n')

#### `get_current_policy()` : Calls test_sample_policy of network using ARGMAX of logits

In [None]:
pols  = (environ.get_current_policy())

for i in pols:
    print(i ,'\n')

#### `gumbel_softmax()`  

In [None]:
np.set_printoptions(precision=8,edgeitems=3, infstr='inf', linewidth=150, nanstr='nan', floatmode = 'maxprec_equal')
torch.set_printoptions(precision=8,linewidth=132)

In [None]:
print(environ.temp)
# tau = environ.temp
tau = 1
for i in range(3): 
    logits_tensor = torch.tensor(logits[0])
    # Sample soft categorical using reparametrization trick:
    gumbel_soft = F.gumbel_softmax(logits_tensor, tau=tau, hard=False).cpu().numpy() 

    # Sample hard categorical using "Straight-through" trick:
    gumbel_hard  = F.gumbel_softmax(logits_tensor, tau=tau, hard=True).cpu().numpy()
    
    for l, gs, gh in zip(lgts, gumbel_soft, gumbel_hard):
        print(f"   {l}   \t {gs}            \t {gh}")
#     print(lgts)
#     print(gumbel_soft)
#     print(gumbel_hard)
    print()

In [None]:
for lgts in logits:
    logits_tensor = torch.tensor(lgts)
    print(lgts)
    # Sample soft categorical using reparametrization trick:
    gumbel_soft = F.gumbel_softmax(logits_tensor, tau=1, hard=False)
    print(gumbel_soft)

    # Sample hard categorical using "Straight-through" trick:
    gumbel_hard  = F.gumbel_softmax(logits_tensor, tau=1, hard=True)
    print(gumbel_hard)
    print()

In [None]:
smax = scipy.special.softmax(logs, axis =1)
# smax = np.array( 
# [[0.46973792, 0.530262  ],
#  [0.45025694, 0.549743  ],
#  [0.4443086 , 0.5556915 ],
#  [0.4138397 , 0.58616036],
#  [0.4140113 , 0.5859887 ],
#  [0.42114905, 0.57885087]])

print(smax.shape)
print(smax)
print(smax[0])
print(smax[0].sum())
print(np.random.choice((1,0), p =smax[0]))

In [None]:
logs = np.array(
[[0.33064184, 0.42053092],
 [0.3532089 , 0.52056104],
 [0.3888512 , 0.5680909 ],
 [0.42039296, 0.694217  ],
 [0.4519742 , 0.73311865],
 [0.48401102, 0.7522658 ]],
)