## Initialization  

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))
%load_ext autoreload
%autoreload 2

In [2]:
import os 
import sys
sys.path.insert(0, '../src')
# sys.path.insert(0, '/home/kbardool/kusanagi/AdaSparseChem/src') ; print(sys.path)
import time
import argparse
import yaml
import types, copy, pprint
from time import sleep
from datetime import datetime
import pandas as pd
import numpy  as np
from utils import (initialize, init_dataloaders, init_environment, init_wandb, training_initializations, model_initializations, 
                   check_for_resume_training, disp_dataloader_info, disp_info_1, warmup_phase, weight_policy_training, 
                   display_gpu_info, init_dataloaders_by_fold_id, print_separator, print_heading, print_underline,
                   timestring, print_loss, print_metrics_cr, get_command_line_args, load_from_pickle) 

pp = pprint.PrettyPrinter(indent=4)
np.set_printoptions(edgeitems=3, infstr='inf', linewidth=150, nanstr='nan')
pd.options.display.width = 132
os.environ["WANDB_NOTEBOOK_NAME"] = "Adashare_Train.ipynb"

## Set visible GPU device 
##----------------------------------------------
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
# os.environ["CUDA_VISIBLE_DEVICES"] = '2'
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Initialization and  Environment Setup

### Parse Input Args  - Read YAML config file - wandb initialization

In [3]:
# synthetic_config_file  = "../yamls/chembl_synt_train.yaml"
# config_file      = "../yamls/chembl_mini_train.yaml"
config_file      = "../yamls/chembl_cb29_train_1task.yaml"
config_file      = "../yamls/chembl_cb29_train_10task.yaml"
batch_size=4096
# batch_size=2048
# RESUME_MODEL_CKPT = 'model_train_ep_25_seed_0088'

####   For Resume 

In [4]:
restart_input_args = f" --config  {config_file} " \
             f" --batch_size       {batch_size} "  \
             " --exp_desc            10-task warmup with policy training " \
             " --hidden_size             4000 4000 4000 4000 4000 4000 "  \
             " --warmup_epochs             50 "  \
             " --tail_hidden_size        4000 "  \
             " --first_dropout           0.80 "  \
             " --middle_dropout          0.80 "  \
             " --last_dropout            0.80 "  \
             " --seed_idx                   0 "  \
             " --task_lr                0.001 "  \
             " --backbone_lr            0.001 "  \
             " --decay_lr_rate            0.5 "  \
             " --decay_lr_freq             40 "  \
             " --decay_lr_cooldown         10 "  \
             " --policy_lr               0.01 "  \
             " --policy_decay_lr_rate     0.5 "  \
             " --policy_decay_lr_freq      40 "  \
             " --policy_decay_lr_cooldown  10 "  \
             " --lambda_tasks             1.0 "  \
             " --lambda_sparsity        0.001 "  \
             " --lambda_sharing          0.05 "  \
             " --pytorch_threads            7 "  \
             " --cuda_devices               2"   \
             " --gpu_ids                    0 "  \
             " --resume"                       \
             " --resume_path        ../../experiments/AdaSparseChem-cb29-10task/4000x6_0822_1755_lr0.001_do0.8" \
             " --resume_ckpt        model_warmup_last_ep_10" \
             " --resume_metrics     metrics_warmup_last_ep_10.pickle" \
             " --exp_id             1x50t0va" \
             " --exp_name           0822_1755 " \
             " --folder_sfx         RESUME_2 "

#              " --resume_ckpt        model_best_model" \
#              " --resume_metrics     metrics_best.pickle" \


####  For Initiating 

In [5]:
input_args = f" --config          {config_file} " \
             f" --batch_size       {batch_size} "  \
             " --exp_desc            10-task warmup with policy training " \
             " --hidden_size             4000 4000 4000 4000 4000 4000 "  \
             " --tail_hidden_size        4000 "  \
             " --warmup_epochs             20 "  \
             " --first_dropout           0.80 "  \
             " --middle_dropout          0.80 "  \
             " --last_dropout            0.80 "  \
             " --seed_idx                   0 "  \
             " --task_lr                0.001 "  \
             " --backbone_lr            0.001 "  \
             " --decay_lr_rate            0.5 "  \
             " --decay_lr_freq             20 "  \
             " --decay_lr_cooldown          5 "  \
             " --policy_lr               0.01 "  \
             " --policy_decay_lr_rate     0.5 "  \
             " --policy_decay_lr_freq      20 "  \
             " --policy_decay_lr_cooldown   5 "  \
             " --lambda_tasks             1.0 "  \
             " --lambda_sparsity        0.005 "  \
             " --lambda_sharing          0.05 "  \
             " --pytorch_threads            7 "  \
             " --cuda_devices               2"   \
             " --gpu_ids                    0 "  \

#              " --decay_lr_rate       0.3 "  \
#              " --decay_lr_freq        10 "  \
#              " --policy_lr         0.001 "  \
#              " --lambda_sparsity    0.02 "  \
#              " --lambda_sharing     0.01 "  \

### Read yaml Configuration File

In [6]:
ns = types.SimpleNamespace()
input_args = input_args.split() if input_args is not None else input_args
# input_args = restart_input_args.split() 
ns.args = get_command_line_args(input_args, display = True)
os.environ["CUDA_VISIBLE_DEVICES"]=ns.args.cuda_devices


 command line parms : 
------------------------
 config...................  ../yamls/chembl_cb29_train_10task.yaml
 project_name.............  None
 exp_id...................  2jeue03f
 exp_name.................  None
 folder_sfx...............  None
 exp_desc.................  10-task warmup with policy training
 hidden_sizes.............  [4000, 4000, 4000, 4000, 4000, 4000]
 tail_hidden_size.........  [4000]
 warmup_epochs............  20
 training_epochs..........  None
 seed_idx.................  0
 batch_size...............  4096
 first_dropout............  0.8
 middle_dropout...........  0.8
 last_dropout.............  0.8
 backbone_lr..............  0.001
 task_lr..................  0.001
 policy_lr................  0.01
 decay_lr_rate............  0.5
 decay_lr_freq............  20
 decay_lr_cooldown........  5
 policy_decay_lr_rate.....  0.5
 policy_decay_lr_freq.....  20
 policy_decay_lr_cooldown.  5
 lambda_tasks.............  1.0
 lambda_sparsity..........  0.005
 lambda_

In [7]:
# display_gpu_info()

In [8]:
opt = initialize(ns, build_folders = True)

##################################################
################### READ YAML ####################
##################################################
 Pytorch thread count: 20
 Set Pytorch thread count to : 7
 Pytorch thread count set to : 7


[34m[1mwandb[0m: Currently logged in as: [33mkbardool[0m. Use [1m`wandb login --relogin`[0m to force relogin


 WandB Initialization -----------------------------------------------------------
 PROJECT NAME: AdaSparseChem-cb29-10Task
 RUN ID      : 2jeue03f 
 RUN NAME    : 0906_2255
 --------------------------------------------------------------------------------


 log_dir              create folder:  ../../experiments/AdaSparseChem-cb29-10task/4000x6_0906_2255_lr0.001_do0.8
 result_dir           folder exists:  ../../experiments/AdaSparseChem-cb29-10task/4000x6_0906_2255_lr0.001_do0.8
 checkpoint_dir       folder exists:  ../../experiments/AdaSparseChem-cb29-10task/4000x6_0906_2255_lr0.001_do0.8

------------------------------------------------------------------------------------------------------------------------
 experiment name       : 0906_2255 
 experiment id         : 2jeue03f 
 folder_name           : 4000x6_0906_2255_lr0.001_do0.8 
 experiment description: 10-task warmup with policy training
 Random seeds          : [88, 45, 50, 100, 44, 48, 2048, 2222, 9999]
 Random  seed used     :

In [24]:
# ns.wandb_run.finish()
ns.wandb_run.finish()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

### Setup Dataloaders

In [10]:
# dldrs = init_dataloaders(opt, verbose = False)
dldrs = init_dataloaders_by_fold_id(opt, verbose = False)
disp_dataloader_info(dldrs)

 Warmup folds    : [2, 3, 4]
 Weights folds   : [2, 3]
 Policy folds    : [4]
 Validation folds: [1]
---------------------------------------------------------------------------
Load label/Y file for task 1 - task group chembl_29_Y_tg_0_cols_472.npy
--------------------------------------------------------------------------- 

 Number of non-zero features in ecfp[0]:79

 Task 1 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :      81937 
    Total   -1  Labels :     188511 
    Total < -1  Labels :          0 
    Total != 0  Labels :     270448

 Task 1 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels per Sample  : 472  Y rows with populated labels: 32866  non zero cols: 81937

 Task 1 files post-filtering : 
---------------------------------
X file : # Samples :  254529     # Features per Sample: 32000 
Y file : # Samples :  254529     # Labels per


 Task 9 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels per Sample  : 344  Y rows with populated labels: 35996  non zero cols: 110249

 Task 9 files post-filtering : 
---------------------------------
X file : # Samples :  254529     # Features per Sample: 32000 
Y file : # Samples :  254529     # Labels per Sample  : 344  Y rows with populated labels: 20684  non zero cols: 63517

Using 226 of 344 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
------------------------------------------------------------------------------
Load label/Y file for task 10 - task group chembl_29_Y_tg_1031_cols_72.npy
------------------------------------------------------------------------------ 

 Number of non-zero features in ecfp[0]:79

 Task 10 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :      18631 
    Total   -1  Labels :     


 Task 7 files post-filtering : 
---------------------------------
X file : # Samples :  168649     # Features per Sample: 32000 
Y file : # Samples :  168649     # Labels per Sample  : 224  Y rows with populated labels: 4476  non zero cols: 14840

Using 109 of 224 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
------------------------------------------------------------------------------
Load label/Y file for task 8 - task group chembl_29_Y_tg_1005_cols_148.npy
------------------------------------------------------------------------------ 

 Number of non-zero features in ecfp[0]:79

 Task 8 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :      45065 
    Total   -1  Labels :     104361 
    Total < -1  Labels :          0 
    Total != 0  Labels :     149426

 Task 8 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels


 Task 6 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels per Sample  : 184  Y rows with populated labels: 15543  non zero cols: 41813

 Task 6 files post-filtering : 
---------------------------------
X file : # Samples :  85880     # Features per Sample: 32000 
Y file : # Samples :  85880     # Labels per Sample  : 184  Y rows with populated labels: 2639  non zero cols: 7755

Using 92 of 184 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
-----------------------------------------------------------------------------
Load label/Y file for task 7 - task group chembl_29_Y_tg_836_cols_224.npy
----------------------------------------------------------------------------- 

 Number of non-zero features in ecfp[0]:79

 Task 7 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :      38227 
    Total   -1  Labels :      91904 
  


 Task 4 files post-filtering : 
---------------------------------
X file : # Samples :  86274     # Features per Sample: 32000 
Y file : # Samples :  86274     # Labels per Sample  : 192  Y rows with populated labels: 3776  non zero cols: 8372

Using 111 of 192 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
----------------------------------------------------------------------------
Load label/Y file for task 5 - task group chembl_29_Y_tg_11_cols_620.npy
---------------------------------------------------------------------------- 

 Number of non-zero features in ecfp[0]:79

 Task 5 label file: 
    Total > +1  Labels :          0 
    Total   +1  Labels :     142158 
    Total   -1  Labels :     193933 
    Total < -1  Labels :          0 
    Total != 0  Labels :     336091

 Task 5 files pre-filtering : 
--------------------------------
X file : # Samples :  423736     # Features per Sample: 32000   
Y file : # Samples :  423736     # Labels per Samp

### Setup Model  

In [11]:
environ = init_environment(ns, opt, is_train = True, display_cfg = True)

##################################################
############# CREATE THE ENVIRONMENT #############
##################################################
 device is  cuda:0
--------------------------------------------------
 SparseChem_Backbone  Ver: 1.0 Init() Start 
-------------------------------------------------- 

 layer config        : [1, 1, 1, 1, 1, 1] 
 skip residual layers: False   skip hidden layers  : False
 SparseChem_BackBone() Input Layer  - Input: 32000  output: 4000  non-linearity:<class 'torch.nn.modules.activation.ReLU'>
 Hidden layer 0 - Input: 4000   output:4000
    _make_layer() using block: <class 'models.sparsechem_backbone.SparseChemBlock'>
           input_size: 4000 output_sz: 4000  non_linearity: ReLU() dropout: 0.8 bias: True
           SparseChemBlock.init(): input_size: 4000 output_sz: 4000   non_linearity: ReLU() dropout: 0.8 bias: True
 Hidden layer 1 - Input: 4000   output:4000
    _make_layer() using block: <class 'models.sparsechem_backbone.SparseChe

In [13]:
print(environ.networks['mtl-net'].policys)
print(environ.policys)

[None, None, None, None, None, None, None, None, None, None]


AttributeError: 'SparseChemEnv' object has no attribute 'policys'

In [15]:
model_initializations(ns, opt, environ, policy_learning = False)

 Model optimizers defined . . . policy_learning: False
 Model schedulers defined . . . policy_learning: False
 Metrics CSV file header written . . . 
 Model initializations complete . . . 


In [16]:
print(environ.networks['mtl-net'].policys)
print(environ.policys)

[None, None, None, None, None, None, None, None, None, None]


AttributeError: 'SparseChemEnv' object has no attribute 'policys'

### Initiate / Resume Training 

In [17]:
check_for_resume_training(ns, opt, environ, epoch = 0 , iter = 0)

opt['train']['which_iter'] :  warmup
##################################################
######## Initiate Training from scratch  #########
##################################################


# Warmup Training

### Warmup Training Preparation

In [18]:


# training_initializations(ns, opt, environ, dldrs, warmup_iterations = 1000, weight_iterations = 750, policy_iterations = 250, eval_iterations = 250, warmup = True)
# training_initializations(ns, opt, environ, dldrs, warmup_iterations = 2, eval_iterations = 2, warmup = True)
training_initializations(ns, opt, environ, dldrs, warmup = True)

 training preparation: - check for CUDA - cuda available as device id: [0]
 --> sparsechem_env.cuda()
 cuda()  - environ doesnt have policy1
dict_keys(['weights', 'alphas'])
 opt:  weights  
 opt:  alphas  
 training preparation: - set print_freq to                        : 63 
 training preparation: - set batches per warmup training epoch to : 63
 training preparation: - set batches per weight training epoch to : 42
 training preparation: - set batches per policy training epoch to : 21
 training preparation: - set batches per validation to            : 22
 training preparation: - warmup_epochs                            : 20
 training preparation: - weight/policy training epochs            : 250
 training preparation: - set curriculum speed  to                 : 3
 training preparation: - set curriculum epochs to                 : 0
 training preparation: - write checkpoints                        : True
 training preparation complete . . .


In [19]:
print(environ.networks['mtl-net'].policys)
print(environ.policys)

[]


AttributeError: 'SparseChemEnv' object has no attribute 'policys'

In [20]:
# print('-'*80)
disp_info_1(ns, opt, environ)
print('-'*80)
print(environ.disp_for_excel())


 Num_blocks                : 6                                

 batch size                : 4096 
 # batches / Warmup epoch  : 42 
 # batches / Weight epoch  : 42 
 # batches / Policy epoch  : 21                                 

 Print Frequency           : -1 
 Config Val Frequency      : 500 
 Config Val Iterations     : 22 
 Val iterations            : 22 
 which_iter                : warmup 
 train_resume              : False                                 
 
 fix BN parms              : False 
 Task LR                   : 0.001 
 Backbone LR               : 0.001                                 

 Sharing  regularization   : 0.05 
 Sparsity regularization   : 0.005 
 Task     regularization   : 1.0                                 

 Current epoch             : 0  
 Warm-up epochs            : 20 
 Training epochs           : 250
--------------------------------------------------------------------------------

    folder: 4000x6_0906_2255_lr0.001_do0.8
    layers: 6 [4000, 4000

In [None]:
# environ.display_trained_logits(ns.current_epoch,out=sys.stdout) 
# environ.display_trained_policy(ns.current_epoch,out=sys.stdout)

In [21]:
# ns.eval_iters = 250
# ns.trn_iters_warmup = 750
# ns.eval_iters = 2
# ns.trn_iters_warmup = 2
ns.warmup_epochs = 10
print(ns.warmup_epochs)
print(ns.eval_iters )
print(ns.trn_iters_warmup)
print(ns.trn_iters_weights)
print(ns.trn_iters_policy)

# ns.check_for_improvment_wait = 0
# ns.current_epoch =0 
# ns.write_checkpoint = False
print_heading(f" Last Epoch: {ns.current_epoch}   # of warm-up epochs to do:  {ns.warmup_epochs} - Run epochs {ns.current_epoch+1} to {ns.current_epoch + ns.warmup_epochs}", verbose = True)

10
22
63
42
21
------------------------------------------------------------------------
 Last Epoch: 0   # of warm-up epochs to do:  10 - Run epochs 1 to 10
------------------------------------------------------------------------ 



### Warmup Training 

In [22]:

warmup_phase(ns,opt, environ, dldrs, epochs = 3, verbose = False, disable_tqdm = False)


----------------------------------------------------------------------
 Last Epoch: 0   # of warm-up epochs to do:  3 - Run epochs 1 to 3
---------------------------------------------------------------------- 

                                                                                                                                                               

IndexError: list index out of range

In [23]:
a = []
for i in range(7):
    a.append(None)
print(a)

[None, None, None, None, None, None, None]


In [30]:

warmup_phase(ns,opt, environ, dldrs, epochs = 30, verbose = False, disable_tqdm = False)


--------------------------------------------------------------------------
 Last Epoch: 48   # of warm-up epochs to do:  30 - Run epochs 49 to 78
-------------------------------------------------------------------------- 

 Ep  | Trunk LR  Heads LR  Polcy LR  Gmbl Tmp |  trn tsk    trn spar    trn shar   trn ttl |    logloss   bceloss  avg prec    aucroc     aucpr    f1_max |  val tsk    val spar    val shar     total |  time |
  49 | 5.00e-04  5.00e-04  1.00e-02  2.50e+00 |   0.9759   6.006e-05   2.397e-05    0.9760 |  4.167e-06   0.46361   0.67510   0.77184   0.66179   0.70336 |   2.1615   1.418e-05   5.660e-06    2.1615 |  57.7 |
 Previous best_epoch:    48   best iter:  3024   best_accuracy: 0.67443    best ROC auc: 0.77139
 New      best_epoch:    49   best iter:  3087   best_accuracy: 0.67510    best ROC auc: 0.77184
  50 | 5.00e-04  5.00e-04  1.00e-02  2.50e+00 |   0.9574   6.006e-05   2.397e-05    0.9575 |  4.199e-06   0.46278   0.67343   0.77057   0.65987   0.70201 |   2.1782 

In [31]:
print_metrics_cr(ns.current_epoch,  time.time() - time.time() , ns.trn_losses, ns.val_metrics, 1, out=[sys.stdout]) 
print()
print_loss(ns.val_metrics, title = f"[e] Last ep:{ns.current_epoch}  it:{ns.current_iter} ")
print()
print(f'   best_epoch: {ns.best_epoch:5d}   best iter: {ns.best_iter:5d}'
      f'   best_accuracy: {ns.best_accuracy:.5f}    best ROC auc: {ns.best_roc_auc:.5f}')      
# print()
# environ.display_trained_logits(ns.current_epoch)
# environ.display_trained_policy(ns.current_epoch)
# environ.display_current_policy(ns.current_epoch)

  78 | 2.50e-04  2.50e-04  1.00e-02  2.50e+00 |   0.8406   6.006e-05   2.397e-05    0.8407 |  4.344e-06   0.47885   0.67015   0.76834   0.65659   0.69901 |   2.2534   1.418e-05   5.660e-06    2.2535 |  -0.0 |

[e] Last ep:78  it:4914  -  Losses:   	 Task: 2.2534   	 Sparsity: 1.41826e-05    	 Sharing: 5.65983e-06    	 Total: 2.2535 

   best_epoch:    49   best iter:  3087   best_accuracy: 0.67510    best ROC auc: 0.77184


In [33]:
for k, v in environ.optimizers.items():
    print(f' key: {k}  values: {v}')
    print(v.state_dict())    

 key: weights  values: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.00025
    weight_decay: 0.0001

Parameter Group 1
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.00025
    weight_decay: 0.0001
)
{'state': {0: {'step': 4914, 'exp_avg': tensor([[ 1.0075e-05,  4.2411e-05, -2.5538e-05,  ..., -1.1571e-05,
          2.4792e-05, -8.7961e-06],
        [ 3.1836e-05, -4.6826e-05, -4.3444e-06,  ..., -9.9101e-06,
         -4.1664e-07,  4.0225e-06],
        [ 1.0846e-06,  1.2314e-05, -3.0259e-06,  ..., -1.9407e-06,
          7.8857e-06,  6.6720e-06],
        ...,
        [ 1.9086e-07, -2.8435e-07, -1.2597e-06,  ..., -7.2261e-08,
         -4.8842e-06, -1.7269e-08],
        [-9.3063e-07, -2.6699e-07, -1.3299e-06,  ..., -1.6308e-07,
         -9.1830e-06, -8.9973e-07],
        [ 1.6819e-06, -1.5741e-07,  9.5266e-08,  ..., -9.7095e-08,
          1.2622e-05,  2.8874e-07]], device='cuda:0'), 'exp_avg_sq': tensor([[1.4925e-08, 1.5227e-

In [34]:
for k, v in environ.schedulers.items():
    print(f' key: {k}  values: {v}')
    print(v.state_dict())

 key: alphas  values: <torch.optim.lr_scheduler.ReduceLROnPlateau object at 0x7f0a7d609d30>
{'factor': 0.5, 'min_lrs': [0], 'patience': 20, 'verbose': True, 'cooldown': 5, 'cooldown_counter': 0, 'mode': 'min', 'threshold': 0.0001, 'threshold_mode': 'rel', 'best': inf, 'num_bad_epochs': 0, 'mode_worse': inf, 'eps': 1e-08, 'last_epoch': 0}
 key: weights  values: <torch.optim.lr_scheduler.ReduceLROnPlateau object at 0x7f0a7d609d00>
{'factor': 0.5, 'min_lrs': [0, 0], 'patience': 20, 'verbose': True, 'cooldown': 5, 'cooldown_counter': 0, 'mode': 'min', 'threshold': 0.0001, 'threshold_mode': 'rel', 'best': 2.1153364644531476, 'num_bad_epochs': 7, 'mode_worse': inf, 'eps': 1e-08, 'last_epoch': 78, '_last_lr': [0.00025, 0.00025]}


### End WandB 

In [35]:
ns.wandb_run.finish()
# ns.wandb_run.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
auc_pr,▁▄▆▇▇▇██████████████████████████████████
avg_prec_score,▁▄▆▇▇███████████████████████████████████
bceloss,█▆▃▂▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃
best_accuracy,▁▄▅▆▆▇▇▇▇▇████████
best_epoch,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄██
best_iter,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄██
best_roc_auc,▁▄▅▆▆▇▇▇▇█████████
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
f1_max,▁▄▆▇▇▇▇█████████████████████████████████
gumbel_temp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
auc_pr,0.65659
avg_prec_score,0.67015
bceloss,0.47885
best_roc_auc,0.77184
epoch,78.0
f1_max,0.69901
gumbel_temp,2.5
kappa,0.28173
kappa_max,0.46955
lambda_sharing,0.05


###  Some data peeks  

In [None]:
pp.pprint(environ.val_metrics['sparsity']['total'])
pp.pprint(environ.val_metrics['sharing']['total'])
pp.pprint(environ.val_metrics['sharing']['total'] +environ.val_metrics['sparsity']['total'])
pp.pprint(environ.val_metrics['task'])
pp.pprint(environ.val_metrics['total'])
pp.pprint(environ.val_metrics['epoch'])

In [None]:
from utils             import censored_mse_loss, censored_mae_loss, aggregate_results
task_key = 'task2'
print(environ.val_data[task_key]['yc_aggr_weights'].sum())
print(environ.val_data[task_key]['yc_aggr_weights'])
print(environ.val_metrics[task_key]['classification'])
# print(environ.val_metrics[task_key]['classification'].sum())
print(environ.val_metrics[task_key]['classification_agg'])
# print(environ.val_data[task_key]['yc_aggr_weights'])
# print((environ.batch_data[task_key]['yc_aggr_weights']==environ.val_data[task_key]['yc_aggr_weights']).all())


tmp = aggregate_results(environ.val_metrics[task_key]["classification"], 
                      environ.val_data[task_key]['yc_aggr_weights'],
                      verbose = True)
 

In [None]:
# del all_tgs, all_tgs2
del con,con2

In [None]:
# del con
ttl = 0

# con = np.ndarray()
appd_df = []
for i in range(1,11):
    task_key = f"task{i}"
    print(i, task_key, ' shape: ', environ.val_data[task_key]['yc_aggr_weights'].shape,  'classifiaction:', environ.val_metrics[task_key]['classification'].shape)
    tmp_df = environ.val_metrics[task_key]['classification'].where(pd.isnull,1)
    print(tmp_df.sum(axis=0))
    
    if i == 1:
        con = np.copy(environ.val_data[task_key]['yc_aggr_weights'])
        all_tgs = environ.val_metrics[task_key]['classification'].copy()
        print("initialize", con.shape, all_tgs.shape)
    else:
        con = np.hstack((con, environ.val_data[task_key]['yc_aggr_weights']))
        all_tgs = all_tgs.append(environ.val_metrics[task_key]['classification'])
        print("concatenate: ",task_key, "    ", con.shape, all_tgs.shape)
        
    ttl += environ.val_data[task_key]['yc_aggr_weights'].shape[0]
    
print('ttl : ', ttl,  'con.shape:', con.shape, 'all_tgs.shape', all_tgs.shape)

In [None]:
all_tgs2 = pd.concat(environ.val_metrics[f"task{i}"]['classification'] for i in range(1,11))

all_tgs2.info()
all_tgs2.head(50)

In [None]:
con2 = np.hstack([ environ.val_data[f"task{i}"]['yc_aggr_weights'] for i in range(1,11)])
con2.shape

In [None]:
# all_tgs.index = range(all_tgs.shape[0])

In [None]:
# print(all_tgs2[-50:])

In [None]:
all_tgs2_mod = all_tgs2.where(pd.isnull, 1) * con2[:,None]
all_tgs2_mod.sum(axis = 0)

In [None]:
# con3 = pd.concat([environ.val_metrics['task1']['classification'],environ.val_metrics['task2']['classification'] ])
# print(con3)

In [None]:
tmp2 = aggregate_results(all_tgs2, con2, verbose = True)
 

In [None]:
pp.pprint(environ.val_metrics['aggregated'])
print(environ.val_metrics['aggregated']['sc_loss'] )
print(environ.val_metrics['aggregated']["logloss"] ) 

In [None]:
pp.pprint(tmp2)
pp.pprint(tmp3)

In [None]:
all_tasks_classification_metrics = []
all_tasks_aggregation_weights    = [] 

for i in range(1,11):
    task_key = f"task{i}"
    print(i, task_key, ' shape: ', environ.val_data[task_key]['yc_aggr_weights'].shape,  'classifiaction:', environ.val_metrics[task_key]['classification'].shape)
    tmp_df = environ.val_metrics[task_key]['classification'].where(pd.isnull,1)
    print(tmp_df.sum(axis=0))
    
    all_tasks_classification_metrics.append(environ.val_metrics[task_key]['classification'])
    all_tasks_aggregation_weights.append(environ.val_data[task_key]['yc_aggr_weights'])
            

In [None]:
all_tgs3 = pd.concat(all_tasks_classification_metrics)
con3 = np.concatenate(all_tasks_aggregation_weights)

In [None]:
all_tgs3.info()
all_tgs3.head(20)

In [None]:
tmp3 = aggregate_results( all_tgs3, con3, verbose = True)

In [None]:
print(all_tgs2[0:1]['roc_auc_score'])
print(all_tgs3[0:1]['roc_auc_score'])
print((all_tgs2[0:1]['roc_auc_score'] == all_tgs3[0:1]['roc_auc_score']).all())
all_tgs2.compare(all_tgs3)

In [None]:
print(environ.val_data['task9']['yc_aggr_weights'].shape, con[3152:3496].shape)
print((environ.val_data['task9']['yc_aggr_weights'] == con[3152:3496]).all())a

In [None]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n")
print()

In [None]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n")
print()

In [None]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n")
print()

In [None]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n")
print()
for key in environ.val_metrics['aggregated']:
    print(f"{key:20s}    {environ.val_metrics['aggregated'][key]:0.4f}")
# pp.pprint(environ.val_metrics['aggregated'])

# Weight & Policy Training

### Weight/Policy Training Preparation

In [25]:
print( f" Backbone Initial LR            :      {environ.opt['train']['backbone_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[0]['lr']} \n"
       f" Tasks    Initial LR            :      {environ.opt['train']['task_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[1]['lr']}    \n"
       f" Policy   Initial LR            :      {environ.opt['train']['policy_lr']:4f}      Current LR : {environ.optimizers['alphas'].param_groups[0]['lr']}  \n")

print( f" Backbone (Group 0) Initial LR  : {environ.opt['train']['backbone_lr']:4f} \n"
       f" Tasks    (Group 1) Initial LR  : {environ.opt['train']['task_lr']:4f}    \n Params : {environ.optimizers['weights']} \n\n"
       f" Policy   Initial LR            : {environ.opt['train']['policy_lr']:4f}  \n Params : {environ.optimizers['alphas']}  \n\n")

print( f" Backbone Initial LR            : {environ.opt['train']['backbone_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[0]['lr']} \n"
       f" Tasks    Initial LR            : {environ.opt['train']['task_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[1]['lr']}    \n"
       f" Policy   Initial LR            : {environ.opt['train']['policy_lr']:4f}      Current LR : {environ.optimizers['alphas'].param_groups[0]['lr']}  \n")

 Backbone Initial LR            :      0.001000      Current LR : 0.000125 
 Tasks    Initial LR            :      0.001000      Current LR : 0.000125    
 Policy   Initial LR            :      0.010000      Current LR : 0.01  

 Backbone (Group 0) Initial LR  : 0.001000 
 Tasks    (Group 1) Initial LR  : 0.001000    
 Params : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.000125
    weight_decay: 0.0001

Parameter Group 1
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.000125
    weight_decay: 0.0001
) 

 Policy   Initial LR            : 0.010000  
 Params : Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.01
    weight_decay: 0.0005
)  


 Backbone Initial LR            : 0.001000      Current LR : 0.000125 
 Tasks    Initial LR            : 0.001000      Current LR : 0.000125    
 Policy   Initial LR            : 0.010000      Current LR : 0.01  



In [26]:
print_underline('Weights Scheduler Parameters', verbose = True) 
for k,i in environ.schedulers['weights'].state_dict().items():
    print(f"    {k:20s}     value: {i}")

print_underline('Policy Scheduler Parameters', verbose = True)
for k,i in environ.schedulers['alphas'].state_dict().items():
    print(f"    {k:20s}     value: {i}")    


Weights Scheduler Parameters
------------------------------
    factor                   value: 0.5
    min_lrs                  value: [0, 0]
    patience                 value: 20
    verbose                  value: True
    cooldown                 value: 5
    cooldown_counter         value: 0
    mode                     value: min
    threshold                value: 0.0001
    threshold_mode           value: rel
    best                     value: 2.0944523516289357
    num_bad_epochs           value: 1
    mode_worse               value: inf
    eps                      value: 1e-08
    last_epoch               value: 150
    _last_lr                 value: [0.000125, 0.000125]

Policy Scheduler Parameters
-----------------------------
    factor                   value: 0.5
    min_lrs                  value: [0]
    patience                 value: 20
    verbose                  value: True
    cooldown                 value: 5
    cooldown_counter         value: 0
    mode  

In [27]:
ns.flag = 'update_weights'
model_initializations(ns, opt, environ, phase = ns.flag, policy_learning = True)
training_initializations(ns, opt, environ, dldrs, warmup = False)

# training_initializations(ns, opt, environ, dldrs, warmup_iterations = 200,  weight_iterations = 2, policy_iterations = 2, eval_iterations = 1, warmup = False)
# training_initializations(ns, opt, environ, dldrs, warmup_iterations = 1000, weight_iterations = 750, policy_iterations = 250, eval_iterations = 500, warmup = False)

 Model optimizers defined . . . policy_learning: True
 Model schedulers defined . . . policy_learning: True
 Metrics CSV file header written . . . 
 Model initializations complete . . . 
 training preparation: - check for CUDA - cuda available as device id: [0]
sparsechem_env.cuda()
 policy policy1 is None
 policy policy2 is None
 policy policy3 is None
 policy policy4 is None
 policy policy5 is None
 policy policy6 is None
 policy policy7 is None
 policy policy8 is None
 policy policy9 is None
 policy policy10 is None
 training preparation: - set print_freq to                                 : 1989 
 training preparation: - set number of batches per warmup training epoch to: 1989
 training preparation: - set number of batches per weight training epoch to: 1318
 training preparation: - set number of batches per policy training epoch to: 671
 training preparation: - set number of batches per validation to           : 675
 training preparation complete . . .


In [28]:
print_heading( f"** {timestring()} \n"
               f"** Training epoch: {ns.current_epoch} iter: {ns.current_iter}   flag: {ns.flag} \n"
               f"** Set optimizer and scheduler to policy_learning = True (Switch weight optimizer from ADAM to SGD)\n"
               f"** Switch from Warm Up training to Alternate training Weights & Policy \n"
               f"** Take checkpoint and block gradient flow through Policy net", verbose=True)

------------------------------------------------------------------------------------------------------------------------
** 2022-08-30 21:33:51:354371 
** Training epoch: 150 iter: 298350   flag: update_weights 
** Set optimizer and scheduler to policy_learning = True (Switch weight optimizer from ADAM to SGD)
** Switch from Warm Up training to Alternate training Weights & Policy 
** Take checkpoint and block gradient flow through Policy net
------------------------------------------------------------------------------------------------------------------------ 



In [45]:
print(environ.opt['train']['lambda_sparsity'])
print(environ.opt['train']['lambda_sharing'])
print(environ.opt['train']['decay_temp_freq'])

0.005
0.05
3


In [46]:
# environ.opt['is_curriculum'] = True
# environ.opt['curriculum_speed'] = 4
# ns.num_train_layers = None
ns.training_epochs = 10

environ.opt['train']['lambda_sparsity'] = 0.01
# environ.opt['train']['lambda_sharing']  = 0.01
# environ.opt['train']['decay_temp_freq'] = 6

# print(environ.opt['train']['lambda_sparsity'])
# print(environ.opt['train']['lambda_sharing'])
# print(environ.opt['train']['decay_temp_freq'])

In [47]:
print( f" ns.flag                        :      {ns.flag}")
print( f" num_train_layers               :      {ns.num_train_layers}")
print( f" environ.opt['is_curriculum']   :      {environ.opt['is_curriculum']}")
print( f" environ.opt['curriculum_speed']:      {environ.opt['curriculum_speed']}\n")
print( f" Backbone Initial LR            :      {environ.opt['train']['backbone_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[0]['lr']} \n"
       f" Tasks    Initial LR            :      {environ.opt['train']['task_lr']:4f}      Current LR : {environ.optimizers['weights'].param_groups[1]['lr']}    \n"
       f" Policy   Initial LR            :      {environ.opt['train']['policy_lr']:4f}      Current LR : {environ.optimizers['alphas'].param_groups[0]['lr']}  \n")

print( f" Hard Sampling                  :      {environ.opt['train']['hard_sampling']}\n")

print( f" Sparsity regularization        :      {environ.opt['train']['lambda_sparsity']}\n"
       f" Sharing  regularization        :      {environ.opt['train']['lambda_sharing']} \n"
       f" Tasks    regularization        :      {environ.opt['train']['lambda_tasks']}   \n\n")

print( f" Gumbel Temp                    :      {environ.gumbel_temperature:.4f}         \n" #
       f" Gumbel Temp decay frequency    :      {environ.opt['train']['decay_temp_freq']} \n") #

print( f" ns.current_epoch               :      {ns.current_epoch}")
print( f" ns.training_epochs             :      {ns.training_epochs} \n") 
print( f" ns.current_iters               :      {ns.current_iter}")  
print( f" Batches in warmup epoch        :      {ns.trn_iters_warmup}")
print( f" Batches in weight epoch        :      {ns.trn_iters_weights}")
print( f" Batches in policy epoch        :      {ns.trn_iters_policy}")
print( f" Batches in validation          :      {ns.eval_iters}")
print( f" num_train_layers               :      {ns.num_train_layers} \n")

 ns.flag                        :      update_weights
 num_train_layers               :      6
 environ.opt['is_curriculum']   :      False
 environ.opt['curriculum_speed']:      3

 Backbone Initial LR            :      0.001000      Current LR : 0.00025 
 Tasks    Initial LR            :      0.001000      Current LR : 0.00025    
 Policy   Initial LR            :      0.010000      Current LR : 0.0025  

 Hard Sampling                  :      False

 Sparsity regularization        :      0.01
 Sharing  regularization        :      0.05 
 Tasks    regularization        :      1.0   


 Gumbel Temp                    :      0.0004         
 Gumbel Temp decay              :      3 

 ns.current_epoch               :      240
 ns.training_epochs             :      10 

 ns.current_iters               :      477360
 Batches in warmup epoch        :      1989
 Batches in weight epoch        :      1318
 Batches in policy epoch        :      671
 Batches in validation          :      675
 

In [44]:
print_metrics_cr(ns.current_epoch,  time.time() - time.time() , ns.trn_losses, ns.val_metrics, 1, out=[sys.stdout]) 
print()
print_loss(ns.val_metrics, title = f"[e] Last ep:{ns.current_epoch}  it:{ns.current_iter} ")
print()
print(f'   best_epoch: {ns.best_epoch:5d}   best iter: {ns.best_iter:5d}'
      f'   best_accuracy: {ns.best_accuracy:.5f}    best ROC auc: {ns.best_roc_auc:.5f}')     
print()
environ.display_trained_policy(ns.current_epoch)
environ.display_trained_logits(ns.current_epoch)
environ.display_current_policy(ns.current_epoch)

print_heading(f" Last Epoch Completed : {ns.current_epoch}       # of epochs to run:  {ns.training_epochs} -->  epochs {ns.current_epoch+1} to {ns.training_epochs + ns.current_epoch}"
              f"\n policy_learning rate : {environ.opt['train']['policy_lr']} "
              f"\n lambda_sparsity      : {environ.opt['train']['lambda_sparsity']}"
              f"\n lambda_sharing       : {environ.opt['train']['lambda_sharing']}"
              f"\n curriculum training  : {opt['is_curriculum']}     cirriculum speed: {opt['curriculum_speed']}     num_training_layers : {ns.num_train_layers}", 
              verbose = True)

 240 | 2.50e-04  2.50e-04  2.50e-03  5.95e-04 |   1.0158   5.050e-04   1.769e-03    1.0181 |  4.141e-06   0.45690   0.66556   0.76161   0.65192   0.69522 |   2.1478   5.176e-04   1.816e-03    2.1501 |  -0.0 |

[e] Last ep:240  it:477360  -  Losses:   	 Task: 2.1478   	 Sparsity: 5.17625e-04    	 Sharing: 1.81609e-03    	 Total: 2.1501 

   best_epoch:   230   best iter: 456799   best_accuracy: 0.66728    best ROC auc: 0.76332


 ep:  240    softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s         
 ----- ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    
  0    0.7231    0.2769  1    0.6753    0.3247  1    0.7544    0.2456  1    0.6956    0

### Weight/Policy Training

In [36]:
print_metrics_cr(ns.current_epoch,  time.time() - time.time() , ns.trn_losses, ns.val_metrics, 1, out=[sys.stdout]) 
print()
print_loss(ns.val_metrics, title = f"[e] Last ep:{ns.current_epoch}  it:{ns.current_iter} ")
print()
print(f'   best_epoch: {ns.best_epoch:5d}   best iter: {ns.best_iter:5d}'
      f'   best_accuracy: {ns.best_accuracy:.5f}    best ROC auc: {ns.best_roc_auc:.5f}')      

 200 | 5.00e-04  5.00e-04  5.00e-03  2.51e-02 |   1.2543   5.526e-04   5.472e-04    1.2554 |  4.117e-06   0.45601   0.66590   0.76196   0.65246   0.69572 |   2.1354   5.663e-04   5.754e-04    2.1366 |  -0.0 |

[e] Last ep:200  it:397800  -  Losses:   	 Task: 2.1354   	 Sparsity: 5.66264e-04    	 Sharing: 5.75399e-04    	 Total: 2.1366 

   best_epoch:   191   best iter: 379899   best_accuracy: 0.66763    best ROC auc: 0.76265


In [None]:
weight_policy_training(ns, opt, environ, dldrs, epochs =20, display_policy = True, disable_tqdm = False)


------------------------------------------------------------------------------------------------------------------------
 Last Epoch Completed : 220       # of epochs to run:  20 -->  epochs 221 to 240
 Backbone Initial LR  : 0.001      Current LR : 0.0005 
 Heads    Initial LR  : 0.001      Current LR : 0.0005
 Policy   Initial LR  : 0.01      Current LR : 0.005
 Regularization tasks : 1.0          Sparsity: 0.005           sharing: 0.05
 curriculum training  : False      Cirriculum speed: 3     num_training_layers : 6
------------------------------------------------------------------------------------------------------------------------ 

 Ep  | Trunk LR  Heads LR  Polcy LR  Gmbl Tmp |  trn tsk    trn spar    trn shar   trn ttl |    logloss   bceloss  avg prec    aucroc     aucpr    f1_max |  val tsk    val spar    val shar     total |  time |
 221 | 5.00e-04  5.00e-04  5.00e-03  3.34e-03 |   0.9841   8.732e-04   4.989e-03    0.9900 |  4.124e-06   0.45618   0.66685   0.76218   0.6532

 224 | 2.50e-04  2.50e-04  2.50e-03  2.51e-03 |   1.1125   8.751e-04   2.771e-03    1.1162 |  4.123e-06   0.45630   0.66710   0.76259   0.65348   0.69648 |   2.1385   5.457e-04   1.728e-03    2.1407 |1306.5 |
 224 | 2.50e-04  2.50e-04  2.50e-03  2.51e-03 |   1.3703   5.354e-04   1.485e-03    1.3723 |  4.122e-06   0.45620   0.66625   0.76183   0.65295   0.69601 |   2.1381   5.488e-04   1.543e-03    2.1402 | 246.4 |

 ep:  224    softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s         
 ----- ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    
  0    0.7000    0.3000  1    0.7615    0.2385  1    0.7625    0.2375  0    0.7171    0.2829  1    0

 228 | 2.50e-04  2.50e-04  2.50e-03  1.88e-03 |   1.0429   8.584e-04   1.954e-03    1.0457 |  4.133e-06   0.45681   0.66580   0.76230   0.65232   0.69584 |   2.1439   5.352e-04   1.218e-03    2.1456 |1305.6 |
 228 | 2.50e-04  2.50e-04  2.50e-03  1.88e-03 |   1.0279   5.245e-04   8.995e-04    1.0294 |  4.154e-06   0.45668   0.66648   0.76262   0.65320   0.69598 |   2.1549   5.377e-04   9.312e-04    2.1564 | 250.8 |
 decay gumbel temperature to 0.0014110189840593756

 ep:  228    softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s         
 ----- ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    
  0    0.7377    0.2623  0    0.7200    0.2800  1

In [36]:
print_metrics_cr(ns.current_epoch,  time.time() - time.time() , ns.trn_losses, ns.val_metrics, 1, out=[sys.stdout]) 
print()
print_loss(ns.val_metrics, title = f"[e] Last ep:{ns.current_epoch}  it:{ns.current_iter} ")
print()
print(f'   best_epoch: {ns.best_epoch:5d}   best iter: {ns.best_iter:5d}'
      f'   best_accuracy: {ns.best_accuracy:.5f}    best ROC auc: {ns.best_roc_auc:.5f}')      

 200 | 5.00e-04  5.00e-04  5.00e-03  2.51e-02 |   1.2543   5.526e-04   5.472e-04    1.2554 |  4.117e-06   0.45601   0.66590   0.76196   0.65246   0.69572 |   2.1354   5.663e-04   5.754e-04    2.1366 |  -0.0 |

[e] Last ep:200  it:397800  -  Losses:   	 Task: 2.1354   	 Sparsity: 5.66264e-04    	 Sharing: 5.75399e-04    	 Total: 2.1366 

   best_epoch:   191   best iter: 379899   best_accuracy: 0.66763    best ROC auc: 0.76265


In [42]:
print_metrics_cr(ns.current_epoch,  time.time() - time.time() , ns.trn_losses, ns.val_metrics, 1, out=[sys.stdout]) 
print()
print_loss(ns.val_metrics, title = f"[e] Last ep:{ns.current_epoch}  it:{ns.current_iter} ")
print()
print(f'   best_epoch: {ns.best_epoch:5d}   best iter: {ns.best_iter:5d}'
      f'   best_accuracy: {ns.best_accuracy:.5f}    best ROC auc: {ns.best_roc_auc:.5f}')      

 240 | 2.50e-04  2.50e-04  2.50e-03  5.95e-04 |   1.0158   5.050e-04   1.769e-03    1.0181 |  4.141e-06   0.45690   0.66556   0.76161   0.65192   0.69522 |   2.1478   5.176e-04   1.816e-03    2.1501 |  -0.0 |

[e] Last ep:240  it:477360  -  Losses:   	 Task: 2.1478   	 Sparsity: 5.17625e-04    	 Sharing: 1.81609e-03    	 Total: 2.1501 

   best_epoch:   230   best iter: 456799   best_accuracy: 0.66728    best ROC auc: 0.76332


In [37]:
print_underline('Weights Scheduler Parameters', verbose = True) 
for k,i in environ.schedulers['weights'].state_dict().items():
    print(f"    {k:20s}     value: {i}")

print_underline('Policy Scheduler Parameters', verbose = True)
for k,i in environ.schedulers['alphas'].state_dict().items():
    print(f"    {k:20s}     value: {i}")    


Weights Scheduler Parameters
------------------------------
    factor                   value: 0.5
    min_lrs                  value: [0, 0]
    patience                 value: 20
    verbose                  value: True
    cooldown                 value: 5
    cooldown_counter         value: 2
    mode                     value: min
    threshold                value: 0.0001
    threshold_mode           value: rel
    best                     value: 2.1219009263455093
    num_bad_epochs           value: 0
    mode_worse               value: inf
    eps                      value: 1e-08
    last_epoch               value: 50
    _last_lr                 value: [0.0005, 0.0005]

Policy Scheduler Parameters
-----------------------------
    factor                   value: 0.5
    min_lrs                  value: [0]
    patience                 value: 20
    verbose                  value: True
    cooldown                 value: 5
    cooldown_counter         value: 2
    mode       

### Weight/Policy Training - repeat

In [48]:
weight_policy_training(ns, opt, environ, dldrs, epochs = 10, display_policy = True, disable_tqdm = False)


------------------------------------------------------------------------------------------------------------------------
 Last Epoch Completed : 240       # of epochs to run:  10 -->  epochs 241 to 250
 Backbone Initial LR  : 0.001      Current LR : 0.00025 
 Heads    Initial LR  : 0.001      Current LR : 0.00025
 Policy   Initial LR  : 0.01      Current LR : 0.0025
 Regularization tasks : 1.0          Sparsity: 0.01           sharing: 0.05
 curriculum training  : False      Cirriculum speed: 3     num_training_layers : 6
------------------------------------------------------------------------------------------------------------------------ 

 Ep  | Trunk LR  Heads LR  Polcy LR  Gmbl Tmp |  trn tsk    trn spar    trn shar   trn ttl |    logloss   bceloss  avg prec    aucroc     aucpr    f1_max |  val tsk    val spar    val shar     total |  time |
 241 | 2.50e-04  2.50e-04  2.50e-03  4.46e-04 |   0.7674   1.660e-03   2.913e-03    0.7719 |  4.130e-06   0.45688   0.66528   0.76121   0.65

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 247 | 2.50e-04  2.50e-04  2.50e-03  2.51e-04 |   0.9301   1.641e-03   2.323e-03    0.9341 |  4.127e-06   0.45676   0.66581   0.76292   0.65224   0.69544 |   2.1407   1.023e-03   1.448e-03    2.1431 |1325.6 |
validation:  72%|███████████████████████████████████▎             | 487/675 [01:11<00:25,  7.31it/s, it=488, Lss=1.7573, Spr=9.3013e-04, Shr=1.2727e-03, lyr=6]

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



 248 | 2.50e-04  2.50e-04  2.50e-03  2.51e-04 |   1.0390   1.631e-03   2.232e-03    1.0428 |  4.121e-06   0.45608   0.66584   0.76264   0.65271   0.69533 |   2.1376   1.017e-03   1.391e-03    2.1400 |1329.8 |
 248 | 2.50e-04  2.50e-04  2.50e-03  2.51e-04 |   1.1531   9.862e-04   1.193e-03    1.1553 |  4.122e-06   0.45629   0.66596   0.76296   0.65243   0.69548 |   2.1384   1.011e-03   1.235e-03    2.1406 | 258.0 |

 ep:  248    softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s          softmax     s         
 ----- ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    ----------------- -    
  0    0.6878    0.3122  1    0.7411    0.2589  1    0.6969    0.3031  1    0.7301    0.2699  0    0

In [49]:
print_metrics_cr(ns.current_epoch,  time.time() - time.time() , ns.trn_losses, ns.val_metrics, 1, out=[sys.stdout]) 
print()
print_loss(ns.val_metrics, title = f"[e] Last ep:{ns.current_epoch}  it:{ns.current_iter} ")
print()
print(f'   best_epoch: {ns.best_epoch:5d}   best iter: {ns.best_iter:5d}'
      f'   best_accuracy: {ns.best_accuracy:.5f}    best ROC auc: {ns.best_roc_auc:.5f}')      

 250 | 1.25e-04  1.25e-04  1.25e-03  1.88e-04 |   1.1690   9.724e-04   1.353e-03    1.1713 |  4.130e-06   0.45660   0.66607   0.76266   0.65240   0.69550 |   2.1423   9.967e-04   1.384e-03    2.1447 |  -0.0 |

[e] Last ep:250  it:497250  -  Losses:   	 Task: 2.1423   	 Sparsity: 9.96697e-04    	 Sharing: 1.38397e-03    	 Total: 2.1447 

   best_epoch:   230   best iter: 456799   best_accuracy: 0.66728    best ROC auc: 0.76332


In [50]:
print_underline('Weights Scheduler Parameters', verbose = True) 
for k,i in environ.schedulers['weights'].state_dict().items():
    print(f"    {k:20s}     value: {i}")

print_underline('Policy Scheduler Parameters', verbose = True)
for k,i in environ.schedulers['alphas'].state_dict().items():
    print(f"    {k:20s}     value: {i}")    


Weights Scheduler Parameters
------------------------------
    factor                   value: 0.5
    min_lrs                  value: [0, 0]
    patience                 value: 20
    verbose                  value: True
    cooldown                 value: 5
    cooldown_counter         value: 4
    mode                     value: min
    threshold                value: 0.0001
    threshold_mode           value: rel
    best                     value: 2.1219009263455093
    num_bad_epochs           value: 0
    mode_worse               value: inf
    eps                      value: 1e-08
    last_epoch               value: 100
    _last_lr                 value: [0.000125, 0.000125]

Policy Scheduler Parameters
-----------------------------
    factor                   value: 0.5
    min_lrs                  value: [0]
    patience                 value: 20
    verbose                  value: True
    cooldown                 value: 5
    cooldown_counter         value: 4
    mode  

In [None]:
# environ.schedulers['alphas'].patience = 15

### Close WandB run

In [51]:
ns.wandb_run.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
auc_pr,▁▃▄▄▄▄▄▄▃▃▆▆▆▇▇▇▇▇█▇████▇███████████████
avg_prec_score,▁▃▄▄▄▄▄▄▄▃▆▆▆▇▇▇▇▇██████▇███████████████
bceloss,█▅▄▄▄▄▄▄▄▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▂▂▂▁▁▁▂▂▂▂▂▂▂▂▂▂
best_accuracy,▁▂▃▃▃▄▄▄▄▄▄▅▅▅▄▅▆▆▆▆▆▆▇▇▇▇▇▇▇███████████
best_epoch,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▇▇▇▇█
best_iter,▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▇▇▇▇█
best_roc_auc,▁▂▃▃▃▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇█████████████
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
f1_max,▁▃▄▄▄▄▄▄▄▃▆▆▇▇▇▇▇▇██████▇▇▇█████████████
gumbel_temp,████████████████████████▆▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁

0,1
auc_pr,0.6524
avg_prec_score,0.66607
bceloss,0.4566
best_roc_auc,0.76332
epoch,250.0
f1_max,0.6955
gumbel_temp,0.00019
kappa,0.26631
kappa_max,0.46433
lambda_sharing,0.05


# Misc Code 

### Check values

In [None]:
# ns.best_epoch = 0
# from utils.notebook_modules import wrapup_phase
# wrapup_phase(ns, opt, environ)

In [None]:
# environ.opt['train']['policy_lr']       = 0.002
# environ.opt['train']['lambda_sparsity'] = 0.05
# environ.opt['train']['lambda_sharing']  = 0.01
# environ.opt['train']['lambda_tasks']    = 1.0
# # environ.opt['train']['decay_temp_freq'] = 2

In [None]:
print(opt['diff_sparsity_weights'])
print(opt['is_sharing'])
print(opt['diff_sparsity_weights'] and not opt['is_sharing'])
print(environ.opt['train']['Lambda_sharing'])
print(opt['train']['Lambda_sharing'])
print(environ.opt['train']['Lambda_sparsity'])
print(opt['train']['Lambda_sparsity'])
print(environ.opt['train']['policy_lr'])
print(opt['train']['policy_lr'])

In [None]:
print( f" Backbone Learning Rate      : {environ.opt['train']['backbone_lr']}\n"
       f" Tasks    Learning Rate      : {environ.opt['train']['task_lr']}\n"
       f" Policy   Learning Rate      : {environ.opt['train']['policy_lr']}\n")

print( f" Sparsity regularization     : {environ.opt['train']['lambda_sparsity']}\n"
       f" Sharing  regularization     : {environ.opt['train']['lambda_sharing']} \n\n"
       f" Tasks    regularization     : {environ.opt['train']['lambda_tasks']}   \n"
       f" Gumbel Temp                 : {environ.gumbel_temperature:.4f}         \n" 
       f" Gumbel Temp decay           : {environ.opt['train']['decay_temp_freq']}\n") 

print( f" current_iters               : {ns.current_iter}   \n"
       f" current_epochs              : {ns.current_epoch}  \n" 
       f" train_total_epochs          : {ns.training_epochs}\n" 
       f" stop_epoch_training         : {ns.stop_epoch_training}")

In [None]:
from torch import nn

In [None]:
for name, param in environ.networks['mtl-net'].named_parameters():
    print(f" {name:40s}  {param.shape} ")

In [None]:
for name, param in environ.networks['mtl-net'].backbone.named_parameters():
        print(f" {name:40s}  {param.shape} ")

In [None]:
for name, param in environ.networks['mtl-net'].named_parameters():
    if 'task' in name and 'fc' in name:    
        print(f" {name:40s}  {param.shape} ")

In [None]:
num_blocks = 6
num_policy_layers = 6
gt =  torch.ones((num_blocks)).long()
gt0 =  torch.zeros((num_blocks)).long()
print(gt)
print(gt0)

loss_weights = ((torch.arange(0, num_policy_layers, 1) + 1).float() / num_policy_layers)
print(loss_weights)

In [None]:
if environ.opt['diff_sparsity_weights'] and not environ.opt['is_sharing']:
    print(' cond 1')
    ## Assign higher weights to higher layers 
    loss_weights = ((torch.arange(0, num_policy_layers, 1) + 1).float() / num_policy_layers)
    print(f"{task_key} sparsity error:  {2 * (loss_weights[-num_blocks:] * environ.cross_entropy2(logits[-num_blocks:], gt)).mean()})")
    print_dbg(f" loss_weights :  {loss_weights}", verbose = True)
    print_dbg(f" cross_entropy:  {environ.cross_entropy2(logits[-num_blocks:], gt)}  ", verbose = True)
    print_dbg(f" loss[sparsity][{task_key}]: {self.losses['sparsity'][task_key] } ", verbose = True)

else:
    print('\n cond 2')
    print_dbg(f"Compute CrossEntropyLoss between \n Logits   : \n{logits[-num_blocks:]} \n and gt: \n{gt} \n", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[-num_blocks:], gt)}")
    
    print('\n cond 2')
    print_dbg(f"Compute CrossEntropyLoss between Logits      : {logits[-1:]}  and gt: {gt[-1]} ", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[-1:], gt[-1:])} \n")
    print_dbg(f"Compute CrossEntropyLoss between Logits      : {logits[-1:]}  and gt: {gt0[-1]} ", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[-1:], gt0[-1:])} \n")
    
    print('\n cond 3')    
    print_dbg(f"Compute CrossEntropyLoss between Logits   : {logits[0:1]}  and gt: {gt[0:1]} ", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[0:1], gt[0:1])} \n")
    print_dbg(f"Compute CrossEntropyLoss between Logits   : {logits[0:1]}  and gt: {gt0[0:1]} ", verbose = True)
    print(f"{task_key} sparsity error:  {environ.cross_entropy_sparsity(logits[0:1], gt0[0:1])} \n")
        
        

In [None]:
print(" ns.check_for_improvment_wait:  {ns.check_for_improvment_wait}")
print(" ns.curriculum_epochs:          {ns.curriculum_epochs}")

In [None]:
# pp.pprint(environ.val_metrics)
df = environ.val_metrics['task1']['classification']

In [None]:
print(df[pd.notna(df.roc_auc_score)])

In [None]:
df[pd.notna(df.roc_auc_score)].mean()

In [None]:
# environ.display_trained_policy(ns.current_epoch,out=[sys.stdout])
# environ.num_tasks
# print(environ.get_policy_prob().shape)
# print(environ.val_data['task1'].keys())
# print(environ.val_data['task1']['yc_ind'][0][:40])
# print(environ.val_data['task1']['yc_ind'][1][:40])
# print(environ.val_data['task1']['yc_data'][:40])
# print(environ.val_data['task1']['yc_hat'][:40])
# environ.display_trained_policy(ns.current_epoch,out=[sys.stdout])
# environ.display_trained_logits(ns.current_epoch,out=[sys.stdout])
batch = next(dldrs.warmup_trn_loader)   

In [None]:
batch.keys()

### Losses and Metrics

In [None]:
print(f" val_metric keys               : {ns.val_metrics.keys()}")
print(f" aggreagted keys               : {ns.val_metrics['aggregated'].keys()}")
print(f" task keys                     : {ns.val_metrics['task'].keys()}")
print(f" task / task1 keys             : {ns.val_metrics['task']['task1']}")
print(f" sparsity keys                 : {ns.val_metrics['sparsity'].keys()}")
print(f" total keys                    : {ns.val_metrics['total'].keys()}")
print(f" aggregated keys               : {ns.val_metrics['aggregated'].keys()}")
print()
print(f" task1 keys                    : {ns.val_metrics['task1'].keys()}")
print(f" task1 classification keys     : {ns.val_metrics['task1']['classification'].keys()}")
print(f" task1 classification_agg keys : {ns.val_metrics['task1']['classification_agg'].keys()}")

print()
print(f" task1 agg sc_loss             : {ns.val_metrics['task1']['classification_agg']['sc_loss']:5f}")
print(f" task1 agg bce_loss            : {ns.val_metrics['task1']['classification_agg']['bceloss']:5f}")
print(f" task1 agg bce_loss            : {ns.val_metrics['task1']['classification_agg']['logloss']:5f}")
print(f" task-task1                    : {ns.val_metrics['task']['task1']:5f}")
print(f" task-task1                    : \n  {ns.val_metrics['task1']['classification']}")
print(f" task-task1                    : \n  {ns.val_metrics['task1']['classification_agg']}")

print()
print(f" task2                         : {ns.val_metrics['task2']['classification_agg']['sc_loss']:5f}")
print(f" task3                         : {ns.val_metrics['task3']['classification_agg']['sc_loss']:5f}")
print(f" loss                          : {ns.val_metrics['loss']['total']:5f}")
print(f" train_time                    : {ns.val_metrics['train_time']:2f}")
print(f" epoch                         : {ns.val_metrics['epoch']}")


In [None]:
environ.batch_data['task1']['yc_trn_weights'].shape

In [None]:
tmp = environ.get_loss_dict()
print(tmp.keys())
pp.pprint(tmp)

In [None]:
type(ns.val_metrics['aggregated'])

In [None]:
pp.pprint(ns.trn_losses)

In [None]:
pp.pprint(environ.val_metrics)

### val_data

In [None]:
dldrs.val_loader.dataset

In [None]:
dldrs.val_loader.dataset.y_class_list

In [None]:
(environ.val_data['task1']['yc_data'][0] == environ.val_data['task1']['yc_data']).all()

In [None]:
from utils.sparsechem_utils import compute_metrics, aggregate_results
import pandas
cc = compute_metrics(cols   = environ.val_data['task1']['yc_ind'][1], 
                     y_true = environ.val_data['task1']['yc_data'], 
                     y_score= environ.val_data['task1']['yc_hat'] ,
                     num_tasks=100)


In [None]:
 df   = pd.DataFrame({"task"   : environ.val_data['task1']['yc_ind'][1], 
                      "y_true" : environ.val_data['task1']['yc_data'],  
                      "y_score": environ.val_data['task1']['yc_hat']})

In [None]:
for task, frame in df.groupby("task", sort=True):
    print(f" task {task}")
    print(frame.head(10))

In [None]:
# df
df.groupby("task", sort=True).count()

In [None]:
pp.pprint(environ.val_metrics)

In [None]:
print(environ.batch_data['task1']['yc_aggr_weights'])
environ.batch['task1']['aggr_weights']

In [None]:
c2 = aggregate_results(cc)

In [None]:
dldrs.trainset0.tasks_weights_list

### Post Warm-up Training stuff

In [None]:
get_all_task_logits
    "p = environ.get_sample_policy(hard_sampling = False)\n"print(p)
p = environ.get_policy_prob()
print(p)
p = environ.get_policy_logits()
print(p)

# p = environ.get_current_policy()
# print(p)

In [None]:
a = softmax([0.0, 1])
print(a)
sampled = np.random.choice((1, 0), p=a)
print(sampled)

In [None]:
print(environ.optimizers['weights'])
print(environ.schedulers['weights'].get_last_lr())

In [None]:
print('losses.keys      : ', environ.losses.keys())
print('losses[task]keys : ', environ.losses['task1'].keys())
pp.pprint(environ.losses)

In [None]:
print( environ.val_metrics.keys())
# pp.pprint(val_metrics)
print(type(environ.val_metrics['aggregated']))
print()
print(type(environ.val_metrics['task1']['classification_agg']))
print()
pp.pprint(environ.val_metrics)

### Policy / Logit stuff

In [None]:
from scipy.special          import softmax

In [None]:
np.set_printoptions(precision=8,edgeitems=3, infstr='inf', linewidth=150, nanstr='nan')
torch.set_printoptions(precision=8,linewidth=132)

#### `get_task_logits(n)` Get logits for task group n

In [None]:
task_logits = environ.get_task_logits(1)
print(task_logits)

#### `get_arch_parameters()`: Get last used logits from network

In [None]:
import torch.optim as optim
arch_parameters      = environ.get_arch_parameters()
print(arch_parameters)

In [None]:
import torch.optim as optim
arch_parameters      = environ.get_arch_parameters()
print(arch_parameters)

#### `get_policy_logits()`:  Get Policy Logits - returns same as `get_arch_parameters()`

In [None]:
logs = environ.get_policy_logits()
for i in logs:
    print(i, '\n')
# probs = softmax(logs, axis= -1)
# for i in probs:
#     print(i, '\n')

#### `get_policy_prob()` : Gets the softmax of the logits

In [None]:
policy_softmaxs = environ.get_policy_prob()
for i in policy_softmaxs:
    print(i, '\n')

#### `get_sample_policy( hard_sampling = False)` : Calls test_sample_policy of network with random choices based on softmax of logits

In [None]:
policy_softmaxs = environ.get_policy_prob()
policies,logits = environ.get_sample_policy(hard_sampling = False)

for l, p, s in zip(logits, policies, policy_softmaxs) :
    for  l_row, p_row, s_row in zip(l, p, s):
        print( l_row,'\t', p_row, '\t', s_row)
    print('\n')

#### `get_sample_policy( hard_sampling = True)` : Calls test_sample_policy of network using ARGMAX of logits

In [None]:
policy_softmaxs = environ.get_policy_prob()
hard_policies, logits = environ.get_sample_policy(hard_sampling = True)

for p,l,s in zip(hard_policies, logits, policy_softmaxs) :
    for  p_row, l_row, s_row in zip(p, l, s):
        print( l_row,'\t', p_row, '\t', s_row)
    print('\n')

#### Print

In [None]:
print(f" Layer    task 1      task 2      task 3")
print(f" -----    ------      ------      ------")
for idx, (l1, l2, l3) in enumerate(zip(hard_policies[0], hard_policies[1], hard_policies[2]),1):
    print(f"   {idx}      {l1}       {l2}       {l3}")
    

    print(f"\n\n where [p1  p2]:  p1: layer is selected    p2: layer is not selected")

In [None]:
def display_trained_policy(iter):

    policy_softmaxs = environ.get_policy_prob()
    policy_argmaxs = 1-np.argmax(policy_softmaxs, axis = -1)
    print(f"  Trained polcies at iteration: {iter} ")
    print(f"                   task 1                           task 2                         task 3        ")
    print(f" Layer       softmax        select          softmax        select          softmax        select   ")
    print(f" -----    ---------------   ------       ---------------   ------       ---------------   ------   ")
    for idx, (l1,l2,l3,  p1,p2,p3) in enumerate(zip(policy_softmaxs[0], policy_softmaxs[1], policy_softmaxs[2], policy_argmaxs[0], policy_argmaxs[1], policy_argmaxs[2]),1):
        print(f"   {idx}      {l1[0]:.4f}   {l1[1]:.4f}   {p1:4d}    {l2[0]:11.4f}   {l2[1]:.4f}   {p2:4d}    {l3[0]:11.4f}   {l3[1]:.4f}   {p3:4d}")

    print()
# print(f"\n\n where [p1  p2]:  p1: layer is selected    p2: layer is not selected")

In [None]:
display_trained_policy(5)

In [None]:
print(f"                        POLICIES (SOFTMAX)                                       task 3          ")
print(f" Layer    task1              task2            task3 softmax         softmax         argmax         softmax         argmax   ")
print(f" -----    -------------     -------------     -------------   ------   ")
for idx, (l1,l2,l3, h1,h2,h3) in enumerate(zip(policy_softmaxs[0], policy_softmaxs[1], policy_softmaxs[2],hard_policies[0], hard_policies[1], hard_policies[2]),1):
    print(f"   {idx}      {l1[0]:.4f} {l1[1]:.4f}     {l2[0]:.4f} {l2[1]:.4f}     {l3[0]:.4f} {l3[1]:.4f}    {h3}")
    
print(f"\n\n where [p1  p2]:  p1: layer is selected    p2: layer is not selected")

In [None]:
# print(policy_softmaxs[2], np.argmax(1-policy_softmaxs[2], axis = -1))
print(policy_softmaxs, np.argmax(policy_softmaxs, axis = -1))

#### `get_current_logits()` : Calls test_sample_policy of network using ARGMAX of logits

In [None]:
logits  = (environ.get_current_logits())
for i in logits:
    print(i ,'\n')

#### `get_current_policy()` : Calls test_sample_policy of network using ARGMAX of logits

In [None]:
pols  = (environ.get_current_policy())

for i in pols:
    print(i ,'\n')

#### `gumbel_softmax()`  

In [None]:
np.set_printoptions(precision=8,edgeitems=3, infstr='inf', linewidth=150, nanstr='nan', floatmode = 'maxprec_equal')
torch.set_printoptions(precision=8,linewidth=132)

In [None]:
print(environ.temp)
# tau = environ.temp
tau = 1
for i in range(3): 
    logits_tensor = torch.tensor(logits[0])
    # Sample soft categorical using reparametrization trick:
    gumbel_soft = F.gumbel_softmax(logits_tensor, tau=tau, hard=False).cpu().numpy() 

    # Sample hard categorical using "Straight-through" trick:
    gumbel_hard  = F.gumbel_softmax(logits_tensor, tau=tau, hard=True).cpu().numpy()
    
    for l, gs, gh in zip(lgts, gumbel_soft, gumbel_hard):
        print(f"   {l}   \t {gs}            \t {gh}")
#     print(lgts)
#     print(gumbel_soft)
#     print(gumbel_hard)
    print()

In [None]:
for lgts in logits:
    logits_tensor = torch.tensor(lgts)
    print(lgts)
    # Sample soft categorical using reparametrization trick:
    gumbel_soft = F.gumbel_softmax(logits_tensor, tau=1, hard=False)
    print(gumbel_soft)

    # Sample hard categorical using "Straight-through" trick:
    gumbel_hard  = F.gumbel_softmax(logits_tensor, tau=1, hard=True)
    print(gumbel_hard)
    print()

In [None]:
smax = scipy.special.softmax(logs, axis =1)
# smax = np.array( 
# [[0.46973792, 0.530262  ],
#  [0.45025694, 0.549743  ],
#  [0.4443086 , 0.5556915 ],
#  [0.4138397 , 0.58616036],
#  [0.4140113 , 0.5859887 ],
#  [0.42114905, 0.57885087]])

print(smax.shape)
print(smax)
print(smax[0])
print(smax[0].sum())
print(np.random.choice((1,0), p =smax[0]))

In [None]:
logs = np.array(
[[0.33064184, 0.42053092],
 [0.3532089 , 0.52056104],
 [0.3888512 , 0.5680909 ],
 [0.42039296, 0.694217  ],
 [0.4519742 , 0.73311865],
 [0.48401102, 0.7522658 ]],
)