## Train SparseChem on Chembl_mini 
Output to `experiments/SparseChem`

In [1]:
# from IPython.core.display import display, HTML
# display(HTML("<style>.container { width:90% !important; }</style>"))
%load_ext autoreload
%autoreload 2

# Copyright (c) 2020 KU Leuven
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"

import argparse
import sys
import os.path
import time
import json
import functools
import types
import wandb
from datetime import datetime
import pprint
import csv
import copy 
from contextlib import redirect_stdout
import sparsechem as sc
from sparsechem import Nothing
from sparsechem.notebook_modules import (check_for_improvement,init_wandb, initialize,
                                        assertions)
import scipy.io
import scipy.sparse
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from pytorch_memlab import MemReporter
from pynvml import *

pp = pprint.PrettyPrinter(indent=4)
np.set_printoptions(edgeitems=3, infstr='inf', linewidth=150, nanstr='nan')
torch.set_printoptions( linewidth=132)
os.environ['WANDB_NOTEBOOK_NAME'] = 'SparseChem_Train_mini.ipynb'

pytorch_threads = 2
print(f" Pytorch thread count: {torch.get_num_threads()}")
print(f" Set Pytorch thread count to : {pytorch_threads}")
torch.set_num_threads(pytorch_threads)
print(f" Pytorch thread count set to : {torch.get_num_threads()}")

if torch.cuda.is_available():
    nvmlInit()

#import warnings
# from torch.serialization import SourceChangeWarning 
#warnings.filterwarnings("ignore", category=UserWarning)    

# import multiprocessing
# multiprocessing.set_start_method('fork', force=True)

 Pytorch thread count: 20
 Set Pytorch thread count to : 2
 Pytorch thread count set to : 2


### Setup command line parameters

In [24]:
datadir="../MLDatasets/chembl29_10task"
outdir ="../experiments/SparseChem-cb29-1task"
project_name = "SparseChem-cb29-1Task"
x_file = "chembl_29_X.npy"
y_file = "chembl_29_Y_all.npy"
folding_file = "chembl_29_folding.npy"
dev = "cuda:1"
batch_size = 4096

cmd = (
  f" --project_name           {project_name} " +
  f" --data_dir                    {datadir} " +
  f" --output_dir                   {outdir} " +
  f" --x                            {x_file} " +
  f" --y_class                      {y_file} " +
  f" --folding                {folding_file} " +
  f" --dev                             {dev} " +
  f" --fold_va                             0 " +
  f" --fold_te                             1 " +
  f" --fold_inputs                     32000 " +
  f" --batch_ratio                      0.01 " +
  f" --batch_size               {batch_size} " +
  f" --hidden_sizes                     1000 " +
  f" --dropouts_trunk                   0.45 " +
  f" --dropouts_class                      0 " +
  f" --weight_decay                     1e-4 " +
  f" --epochs                            100 " +
  f" --lr                               1e-3 " +
  f" --lr_steps                           10 " +
  f" --lr_alpha                          0.3 " + 
  f" --prefix                             sc " +
  f" --min_samples_class                   1 "
)

# f" --dev              cuda:0 "
# f" --dev              cuda:0 "

### Initializations 

In [25]:
args = initialize(cmd)
def vprint(s=""):
    if args.verbose:
        print(s)


  command line parms : 
------------------------
 data_dir.................  ../MLDatasets/chembl29_10task
 output_dir...............  ../experiments/SparseChem-cb29-1task
 x........................  chembl_29_X.npy
 y_class..................  chembl_29_Y_all.npy
 project_name.............  SparseChem-cb29-1Task
 exp_id...................  None
 exp_name.................  None
 exp_desc.................  
 folder_sfx...............  None
 hidden_sizes.............  [1000]
 dropouts_trunk...........  [0.45]
 class_feature_size.......  -1
 last_hidden_sizes........  None
 epochs...................  100
 batch_size...............  4096
 weight_decay.............  0.0001
 last_non_linearity.......  relu
 middle_non_linearity.....  relu
 input_transform..........  none
 lr.......................  0.001
 lr_alpha.................  0.3
 lr_steps.................  [10]
 weights_class............  None
 weights_regr.............  None
 fold_va..................  0
 fold_te..................  1

In [26]:
pp.pprint(vars(args))

{   'batch_ratio': 0.01,
    'batch_size': 4096,
    'censored_loss': 1,
    'class_feature_size': -1,
    'data_dir': '../MLDatasets/chembl29_10task',
    'dev': 'cuda:1',
    'dropouts_class': [0.0],
    'dropouts_reg': [],
    'dropouts_trunk': [0.45],
    'enable_cat_fusion': 0,
    'epochs': 100,
    'eval_frequency': 1,
    'eval_train': 0,
    'exp_desc': '',
    'exp_id': '33x3rznq',
    'exp_name': '0912_1601',
    'fold_inputs': 32000,
    'fold_te': 1,
    'fold_va': 0,
    'folder_sfx': None,
    'folding': '../MLDatasets/chembl29_10task/chembl_29_folding.npy',
    'hdn_layer_size': 1000,
    'hidden_sizes': [1000],
    'input_size_freq': None,
    'input_transform': 'none',
    'internal_batch_max': None,
    'inverse_normalization': 0,
    'last_hidden_sizes': None,
    'last_hidden_sizes_class': None,
    'last_hidden_sizes_reg': None,
    'last_non_linearity': 'relu',
    'lr': 0.001,
    'lr_alpha': 0.3,
    'lr_steps': [10],
    'middle_non_linearity': 'relu',
    'mi

### Assertions

In [27]:
assertions(args)

All assertions passed successfully


### Summary writer

In [28]:
if args.profile == 1:
    assert (args.save_board==1), "Tensorboard should be enabled to be able to profile memory usage."
if args.save_board:
    # tb_name = os.path.join(args.output_dir, "", args.name)
    writer  = SummaryWriter(args.output_dir)
else:
    writer = Nothing()
    

### Load datasets

In [29]:
ecfp     = sc.load_sparse(args.x)
y_class  = sc.load_sparse(args.y_class)
y_regr   = sc.load_sparse(args.y_regr)
y_censor = sc.load_sparse(args.y_censor)

if (y_regr is None) and (y_censor is not None):
    raise ValueError("y_censor provided please also provide --y_regr.")
if y_class is None:
    y_class = scipy.sparse.csr_matrix((ecfp.shape[0], 0))
if y_regr is None:
    y_regr  = scipy.sparse.csr_matrix((ecfp.shape[0], 0))
if y_censor is None:
    y_censor = scipy.sparse.csr_matrix(y_regr.shape)

# Load folding
folding = np.load(args.folding)
assert ecfp.shape[0] == folding.shape[0], "x and folding must have same number of rows"

## Loading task weights
tasks_class = sc.load_task_weights(args.weights_class, y=y_class, label="y_class")
tasks_regr  = sc.load_task_weights(args.weights_regr, y=y_regr, label="y_regr")

#------------------------------------------------------------------
## Input and folding transformation
#------------------------------------------------------------------
ecfp = sc.fold_transform_inputs(ecfp, folding_size=args.fold_inputs, transform=args.input_transform)
print(f"Row 0 count_nonzero():{ecfp[0].count_nonzero()}")


num_pos    = np.array((y_class == +1).sum(0)).flatten()
num_neg    = np.array((y_class == -1).sum(0)).flatten()
num_class  = np.array((y_class != 0).sum(0)).flatten()
if (num_class != num_pos + num_neg).any():
    raise ValueError("For classification all y values (--y_class/--y) must be 1 or -1.")

num_regr   = np.bincount(y_regr.indices, minlength=y_regr.shape[1])

assert args.min_samples_auc is None, "Parameter 'min_samples_auc' is obsolete. Use '--min_samples_class' that specifies how many samples a task needs per FOLD and per CLASS to be aggregated."
print()
#------------------------------------------------------------------
## Aggregation Weights 
#------------------------------------------------------------------
if tasks_class.aggregation_weight is None:
    print(f"task aggresgation weights is None - using min samples rule (= {args.min_samples_class})")
    ## using min_samples rule
    fold_pos, fold_neg = sc.class_fold_counts(y_class, folding)
    n = args.min_samples_class
    tasks_class.aggregation_weight = ((fold_pos >= n).all(0) & (fold_neg >= n)).all(0).astype(np.float64)

if tasks_regr.aggregation_weight is None:
    if y_censor.nnz == 0:
        y_regr2 = y_regr.copy()
        y_regr2.data[:] = 1
    else:
        ## only counting uncensored data
        y_regr2      = y_censor.copy()
        y_regr2.data = (y_regr2.data == 0).astype(np.int32)
    fold_regr, _ = sc.class_fold_counts(y_regr2, folding)
    del y_regr2
    tasks_regr.aggregation_weight = (fold_regr >= args.min_samples_regr).all(0).astype(np.float64)

print(f" Input dimension   : {ecfp.shape[1]}")
print(f" # Samples         : {ecfp.shape[0]}")
print(f" Y file shape      : {y_class.shape}")
print()
vprint(f"Using {(tasks_class.aggregation_weight > 0).sum()} classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).")
vprint(f"Using {(tasks_regr.aggregation_weight > 0).sum()} regression tasks for calculating metrics (RMSE, Rsquared, correlation).")


Row 0 count_nonzero():79

task aggresgation weights is None - using min samples rule (= 1)
 Input dimension   : 32000
 # Samples         : 423736
 Y file shape      : (423736, 3552)

Using 2038 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
Using 0 regression tasks for calculating metrics (RMSE, Rsquared, correlation).


In [30]:
(tasks_class.aggregation_weight > 0).sum()
print('fold_pos shape is : ',fold_pos.shape)
print(f"fold_pos.sum() shape: {fold_pos.sum(axis=-1).shape},  sum:{fold_pos.sum(axis=-1)} ")
print(fold_pos)
print()
print('fold_neg shape is : ',fold_neg.shape)
print(fold_neg.sum(), fold_neg.sum(axis=-1))
print(fold_neg)

fold_pos shape is :  (5, 3552)
fold_pos.sum() shape: (5,),  sum:[190358 180462 189258 184490 188576] 
[[  0   0   0 ...  59   2   0]
 [ 24  15  12 ...  45   0   0]
 [ 44  30   8 ...  37   8   1]
 [ 18  13   5 ... 111  10   0]
 [ 20  14   4 ...   4   0   0]]

fold_neg shape is :  (5, 3552)
1679106 [338329 337435 330723 334036 338583]
[[  0   0   0 ...  57  93  93]
 [  0   9  12 ...  86 122 122]
 [  9  23  45 ...  83 112 118]
 [  2   7  15 ... 108 159 169]
 [  3   9  19 ...  88  89  89]]


In [61]:
print(tasks_class.aggregation_weight.sum())
print(tasks_class.aggregation_weight)
print(tasks_class.training_weight.sum())
print(tasks_class.training_weight)
 

2038.0
[0. 0. 0. ... 1. 0. 0.]
tensor(3552., device='cuda:1')
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:1')


In [32]:
## Separation of test data
if args.fold_te is not None and args.fold_te >= 0:
    ## removing test data
    assert args.fold_te != args.fold_va, "fold_va and fold_te must not be equal."
    keep    = folding != args.fold_te
    ecfp    = ecfp[keep]
    y_class = y_class[keep]
    y_regr  = y_regr[keep]
    y_censor = y_censor[keep]
    folding = folding[keep]

## Regression Normalization    
normalize_inv = None
if args.normalize_regression == 1 and args.normalize_regr_va == 1:
   y_regr, mean_save, var_save = sc.normalize_regr(y_regr)

## Separation of train and Validation data
fold_va = args.fold_va
idx_tr  = np.where(folding != fold_va)[0]
idx_va  = np.where(folding == fold_va)[0]

y_class_tr = y_class[idx_tr]
y_class_va = y_class[idx_va]
y_regr_tr  = y_regr[idx_tr]
y_regr_va  = y_regr[idx_va]
y_censor_tr = y_censor[idx_tr]
y_censor_va = y_censor[idx_va]

## REgression normalization
if args.normalize_regression == 1 and args.normalize_regr_va == 0:
   y_regr_tr, mean_save, var_save = sc.normalize_regr(y_regr_tr) 
   if args.inverse_normalization == 1:
      normalize_inv = {}
      normalize_inv["mean"] = mean_save
      normalize_inv["var"]  = var_save
    
num_pos_va  = np.array((y_class_va == +1).sum(0)).flatten()
num_neg_va  = np.array((y_class_va == -1).sum(0)).flatten()
num_regr_va = np.bincount(y_regr_va.indices, minlength=y_regr.shape[1])
pos_rate = num_pos_va/(num_pos_va+num_neg_va)
pos_rate_ref = args.pi_zero
pos_rate = np.clip(pos_rate, 0, 0.99)
cal_fact_aucpr = pos_rate*(1-pos_rate_ref)/(pos_rate_ref*(1-pos_rate))

vprint(f"Input dimension   : {ecfp.shape[1]}")
vprint(f"Input dimension   : {ecfp.shape[1]}")
vprint(f"Training dataset  : {ecfp[idx_tr].shape}")
vprint(f"Validation dataset: {ecfp[idx_va].shape}")
vprint()
vprint(f"#classification tasks:  {y_class.shape[1]}")
vprint(f"#regression tasks    :      {y_regr.shape[1]}")
vprint(f"Using {(tasks_class.aggregation_weight > 0).sum():3d} classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).")
vprint(f"Using {(tasks_regr.aggregation_weight > 0).sum():3d} regression tasks for calculating metrics (RMSE, Rsquared, correlation).")

Input dimension   : 32000
Input dimension   : 32000
Training dataset  : (254529, 32000)
Validation dataset: (82933, 32000)

#classification tasks:  3552
#regression tasks    :      0
Using 2038 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
Using   0 regression tasks for calculating metrics (RMSE, Rsquared, correlation).


  pos_rate = num_pos_va/(num_pos_va+num_neg_va)


### Batch Size Calculation

In [33]:
num_int_batches = 1
if args.batch_size is not None:
    batch_size = args.batch_size
else:
    batch_size = int(np.ceil(args.batch_ratio * idx_tr.shape[0]))

print(f"orig batch size:   {batch_size}")
print(f"orig num int batches:   {num_int_batches}")

if args.internal_batch_max is not None:
    if args.internal_batch_max < batch_size:
        num_int_batches = int(np.ceil(batch_size / args.internal_batch_max))
        batch_size      = int(np.ceil(batch_size / num_int_batches))
print(f"batch size:   {batch_size}")
print(f"num_int_batches:   {num_int_batches}")

orig batch size:   4096
orig num int batches:   1
batch size:   4096
num_int_batches:   1


In [34]:
# #import ipdb; ipdb.set_trace()
# batch_size  = int(np.ceil(args.batch_ratio * idx_tr.shape[0]))
# num_int_batches = 1

# if args.internal_batch_max is not None:
#     if args.internal_batch_max < batch_size:
#         num_int_batches = int(np.ceil(batch_size / args.internal_batch_max))
#         batch_size      = int(np.ceil(batch_size / num_int_batches))
# vprint(f"#internal batch size:   {batch_size}")

In [35]:
tasks_cat_id_list = None
select_cat_ids = None
if tasks_class.cat_id is not None:
    tasks_cat_id_list = [[x,i] for i,x in enumerate(tasks_class.cat_id) if str(x) != 'nan']
    tasks_cat_ids = [i for i,x in enumerate(tasks_class.cat_id) if str(x) != 'nan']
    select_cat_ids = np.array(tasks_cat_ids)
    cat_id_size = len(tasks_cat_id_list)
else:
    cat_id_size = 0

### Dataloaders

In [36]:
dataset_tr = sc.ClassRegrSparseDataset(x=ecfp[idx_tr], y_class=y_class_tr, y_regr=y_regr_tr, y_censor=y_censor_tr, y_cat_columns=select_cat_ids)
dataset_va = sc.ClassRegrSparseDataset(x=ecfp[idx_va], y_class=y_class_va, y_regr=y_regr_va, y_censor=y_censor_va, y_cat_columns=select_cat_ids)

loader_tr = DataLoader(dataset_tr, batch_size=batch_size, num_workers = 8, pin_memory=True, collate_fn=dataset_tr.collate, shuffle=True)
loader_va = DataLoader(dataset_va, batch_size=batch_size, num_workers = 4, pin_memory=True, collate_fn=dataset_va.collate, shuffle=False)

args.input_size  = dataset_tr.input_size
args.output_size = dataset_tr.output_size

args.class_output_size = dataset_tr.class_output_size
args.regr_output_size  = dataset_tr.regr_output_size
args.cat_id_size = cat_id_size



In [37]:

print(f"\n dataset_tr.y_class                                 :  {dataset_tr.y_class.shape}",
      f"\n dataset_va.y_class                                 :  {dataset_va.y_class.shape}",
#       f"\n dataset_test.y_class                                 :  {dataset_va.y_class.shape}",
      f"\n                                ",
      f'\n size of training set                               :  {len(dataset_tr)}',
      f'\n size of validation set                             :  {len(dataset_va)}',
#     #   f'\n size of test set                                   :  {len(dldrs.testset)}',
#     #   f'\n                               Total                :  {len(dldrs.trainset0)+len(dldrs.trainset1)+len(dldrs.trainset2)+len(dldrs.valset)+ len(dldrs.testset)}',
      f"\n                                ",
      f"\n Number of batches in training                      :  {len(loader_tr)}",
      f"\n Number of batches in validation dataset            :  {len(loader_va)}",
    #   f"\n lenght (# batches) in test dataset                 :  {len(dldrs.test_loader)}",
      f"\n                                ")
                


 dataset_tr.y_class                                 :  (254529, 3552) 
 dataset_va.y_class                                 :  (82933, 3552) 
                                 
 size of training set                               :  254529 
 size of validation set                             :  82933 
                                 
 Number of batches in training                      :  63 
 Number of batches in validation dataset            :  21 
                                


###  WandB setup

In [38]:
#------------------------------------------------------------------
# ### WandB setup
#------------------------------------------------------------------
ns = types.SimpleNamespace()
ns.current_epoch  = 0
ns.current_iter   = 0
ns.best_results   = {}
ns.best_metrics   = None
ns.best_accuracy  = 0 
ns.best_roc_auc   = 0 
ns.best_iter      = 0
ns.best_epoch     = 0
ns.p_epoch        = 0
ns.num_prints     = 0

init_wandb(ns, args)
wandb.define_metric("best_accuracy", summary="last")
wandb.define_metric("best_roc_auc", summary="last")
wandb.define_metric("best_epoch", summary="last")

# ns.best_value     = 0 
# wandb.define_metric("best_accuracy", summary="last")


33x3rznq 0912_1601 SparseChem-cb29-1Task


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.03333793481190999, max=1.0)…

 PROJECT NAME: SparseChem-cb29-1Task
 RUN ID      : 33x3rznq 
 RUN NAME    : 0912_1601


<wandb.sdk.wandb_metric.Metric at 0x7f9691a11940>

### Network

In [39]:
#------------------------------------------------------------------
# ### Network
#------------------------------------------------------------------
dev  = torch.device(args.dev)

net  = sc.SparseFFN(args).to(dev)
loss_class = torch.nn.BCEWithLogitsLoss(reduction="none")
loss_regr  = sc.censored_mse_loss

if not args.censored_loss:
    loss_regr = functools.partial(loss_regr, censored_enabled=False)

tasks_class.training_weight = tasks_class.training_weight.to(dev)
tasks_regr.training_weight  = tasks_regr.training_weight.to(dev)
tasks_regr.censored_weight  = tasks_regr.censored_weight.to(dev)

###  Optimizer, Scheduler, GradScaler

In [40]:
#------------------------------------------------------------------
# ###  Optimizer, Scheduler, GradScaler
#------------------------------------------------------------------
optimizer = torch.optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay)
scheduler = MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_alpha)
scaler = torch.cuda.amp.GradScaler()

wandb.watch(net, log='all', log_freq= 100)     ###  Weights and Biases Initialization 
reporter = None
h = None

### setup memory profiling reporter

In [41]:
if args.profile == 1:
   torch_gpu_id = torch.cuda.current_device()
   if "CUDA_VISIBLE_DEVICES" in os.environ:
      ids = list(map(int, os.environ.get("CUDA_VISIBLE_DEVICES", "").split(",")))
      nvml_gpu_id = ids[torch_gpu_id] # remap
   else:
      nvml_gpu_id = torch_gpu_id
   h = nvmlDeviceGetHandleByIndex(nvml_gpu_id)

if args.profile == 1:
   #####   output saving   #####
   if not os.path.exists(args.output_dir):
       os.makedirs(args.output_dir)

   reporter = MemReporter(net)

   with open(f"{args.output_dir}/memprofile.txt", "w+") as profile_file:
        with redirect_stdout(profile_file):
             profile_file.write(f"\nInitial model detailed report:\n\n")
             reporter.report()

In [42]:
#------------------------------------------------------------------
# ### Display network and other values
#------------------------------------------------------------------
print("Network:")
print(net)
print(optimizer)
print(f"dev                  :    {dev}")
print(f"args.lr              :    {args.lr}")
print(f"args.weight_decay    :    {args.weight_decay}")
print(f"args.lr_steps        :    {args.lr_steps}")
print(f"args.lr_steps        :    {args.lr_steps}")
print(f"num_int_batches      :    {num_int_batches}")
print(f"batch_size           :    {batch_size}")
print(f"current epoch        :    {ns.current_epoch}")
print(f"epochs               :    {args.epochs}")
print(f"scaler               :    {scaler}")
print(f"args.normalize_loss  :    {args.normalize_loss}")
print(f"loss_class           :    {loss_class}")
print(f"mixed precision      :    {args.mixed_precision}")
print(f"args.eval_train      :    {args.eval_train}")

Network:
SparseFFN(
  (net): Sequential(
    (0): SparseInputNet(
      (net_freq): SparseLinear(in_features=32000, out_features=1000, bias=True)
    )
    (1): MiddleNet(
      (net): Sequential()
    )
  )
  (classLast): LastNet(
    (net): Sequential(
      (initial_layer): Sequential(
        (0): ReLU()
        (1): Dropout(p=0.45, inplace=False)
        (2): Linear(in_features=1000, out_features=3552, bias=True)
      )
    )
  )
  (regrLast): Sequential(
    (0): LastNet(
      (net): Sequential(
        (initial_layer): Sequential(
          (0): Tanh()
          (1): Dropout(p=0.45, inplace=False)
          (2): Linear(in_features=1000, out_features=0, bias=True)
        )
      )
    )
  )
)
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    initial_lr: 0.001
    lr: 0.001
    weight_decay: 0.0001
)
dev                  :    cuda:1
args.lr              :    0.001
args.weight_decay    :    0.0001
args.lr_steps        :    [10]
args.lr_steps 

##  Training Loop

In [43]:
import warnings
# from torch.serialization import SourceChangeWarning 
warnings.filterwarnings("ignore", category=UserWarning)    
args.epochs = 10
ns.end_epoch = ns.current_epoch + args.epochs
print(f" Last Epoch: {ns.current_epoch}   # of epochs to do:  {args.epochs} - Run epochs {ns.current_epoch+1} to {ns.end_epoch}")

 Last Epoch: 0   # of epochs to do:  10 - Run epochs 1 to 10


In [44]:
ns.end_epoch = ns.current_epoch + args.epochs

for ns.current_epoch in range(ns.current_epoch+1, ns.end_epoch+1, 1):
    t0 = time.time()
    sc.train_class_regr(
        net, optimizer,
        loader          = loader_tr,
        loss_class      = loss_class,
        loss_regr       = loss_regr,
        dev             = dev,
        weights_class   = tasks_class.training_weight * (1-args.regression_weight) * 2,
        weights_regr    = tasks_regr.training_weight * args.regression_weight * 2,
        censored_weight = tasks_regr.censored_weight,
        normalize_loss  = args.normalize_loss,
        num_int_batches = num_int_batches,
        progress        = False,
        writer          = writer,
        epoch           = ns.current_epoch,
        args            = args,
        scaler          = scaler,
        nvml_handle     = h)

    if args.profile == 1:
       with open(f"{args.output_dir}/memprofile.txt", "a+") as profile_file:
            profile_file.write(f"\nAfter epoch {epoch} model detailed report:\n\n")
            with redirect_stdout(profile_file):
                 reporter.report()

    t1 = time.time()
    eval_round = (args.eval_frequency > 0) and ((ns.current_epoch + 1) % args.eval_frequency == 0)
    last_round = ns.current_epoch == args.epochs - 1

    if eval_round or last_round:

        results_va = sc.evaluate_class_regr(net, loader_va, loss_class, loss_regr, 
                                            tasks_class= tasks_class, 
                                            tasks_regr = tasks_regr, 
                                            dev        = dev, 
                                            progress   = False, 
                                            normalize_inv=normalize_inv, 
                                            cal_fact_aucpr=cal_fact_aucpr)
        
        for key, val in results_va["classification_agg"].items():
            writer.add_scalar("val_metrics:aggregated/"+key, val, ns.current_epoch * batch_size)


        if args.eval_train:
            results_tr = sc.evaluate_class_regr(net, loader_tr, loss_class, loss_regr, 
                                                tasks_class = tasks_class, 
                                                tasks_regr  = tasks_regr, 
                                                dev         = dev, 
                                                progress    = args.verbose >= 2)
            for key, val in results_tr["classification_agg"].items():
                writer.add_scalar("trn_metrics:aggregated/"+key, val, ns.current_epoch * batch_size)

        else:
            results_tr = None

        if args.verbose:
            ## printing a new header every 20 lines
            header = ns.num_prints % 20 == 0
            ns.num_prints += 1
            sc.print_metrics_cr(ns.current_epoch, t1 - t0, results_tr, results_va, header)
            
        wandb.log(results_va["classification_agg"].to_dict())

        check_for_improvement(ns, results_va)
    
    scheduler.step()

print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n")

Epoch  |      logl   bceloss  avg prec   auc roc    auc pr aucpr_cal    f1_max |      rmse  rsquared  corrcoef | tr_time 
1      |   0.39531   0.51529   0.61102   0.69860   0.59813   0.36171   0.66031 |       nan       nan       nan |    3.5 
 Previous best_epoch:     0       best_accuracy: 0.00000    best ROC auc: 0.00000
 New      best_epoch:     1       best_accuracy: 0.61102    best ROC auc: 0.69860
2      |   0.37724   0.49380   0.63498   0.72573   0.62241   0.39290   0.67715 |       nan       nan       nan |    4.1 
 Previous best_epoch:     1       best_accuracy: 0.61102    best ROC auc: 0.69860
 New      best_epoch:     2       best_accuracy: 0.63498    best ROC auc: 0.72573
3      |   0.37111   0.48282   0.64862   0.74244   0.63623   0.41047   0.68655 |       nan       nan       nan |    4.0 
 Previous best_epoch:     2       best_accuracy: 0.63498    best ROC auc: 0.72573
 New      best_epoch:     3       best_accuracy: 0.64862    best ROC auc: 0.74244
4      |   0.36713   0.

In [46]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best Precision :   {ns.best_accuracy:.5f}\n"
      f"Best ROC AUC   :   {ns.best_roc_auc:.5f}\n")
print()
for k in results_va['classification_agg'].keys():
    print(f" {k:20s}  {results_va['classification_agg'][k]:.4f}")

Best Epoch :       10
Best Iteration :   0 
Best Precision :   0.66453
Best ROC AUC   :   0.76565


 roc_auc_score         0.7657
 auc_pr                0.6517
 avg_prec_score        0.6645
 f1_max                0.6963
 p_f1_max              0.3501
 kappa                 0.2402
 kappa_max             0.4666
 p_kappa_max           0.4329
 bceloss               0.4714
 auc_pr_cal            0.4303
 logloss               0.3650
 num_tasks_total       3552.0000
 num_tasks_agg         2038.0000


In [47]:
pd.options.display.width = 150
df = results_va['classification']
print(df[pd.notna(df.roc_auc_score)])

      roc_auc_score    auc_pr  avg_prec_score    f1_max  p_f1_max     kappa  kappa_max  p_kappa_max   bceloss  auc_pr_cal
task                                                                                                                     
4          0.947173  0.999010        0.999011  0.990968  0.697061  0.000000   0.318420     0.846933  0.063533    0.912445
5          0.898051  0.990939        0.990952  0.979757  0.600789  0.378755   0.615441     0.666567  0.159456    0.624475
6          0.717681  0.833718        0.834275  0.842271  0.171358  0.263361   0.317895     0.616627  0.577373    0.235801
7          0.628455  0.241129        0.245616  0.361905  0.045056  0.000000   0.126116     0.045056  0.511482    0.156172
8          0.634470  0.284348        0.318164  0.426667  0.070655  0.000000   0.208854     0.265385  0.475046    0.160961
...             ...       ...             ...       ...       ...       ...        ...          ...       ...         ...
3546       0.693182  0.8

## Post Training 

In [48]:
#print("DEBUG data for hidden spliting")
#print (f"Classification mask: Sum = {net.classmask.sum()}\t Uniques: {np.unique(net.classmask)}")
#print (f"Regression mask:     Sum = {net.regmask.sum()}\t Uniques: {np.unique(net.regmask)}")
#print (f"overlap: {(net.regmask * net.classmask).sum()}")

writer.close()
vprint()
if args.profile == 1:
   multiplexer = sc.create_multiplexer(tb_name)
#   sc.export_scalars(multiplexer, '.', "GPUmem", "testcsv.csv")
   data = sc.extract_scalars(multiplexer, '.', "GPUmem")
   vprint(f"Peak GPU memory used: {sc.return_max_val(data)}MB")
vprint("Saving performance metrics (AUCs) and model.")

#####   model saving   #####
if not os.path.exists(args.output_dir):
   os.makedirs(args.output_dir)

model_file = f"{args.output_dir}/{args.name}.pt"
out_file   = f"{args.output_dir}/{args.name}.json"

if args.save_model:
   torch.save(net.state_dict(), model_file)
   vprint(f"Saved model weights into '{model_file}'.")

results_va["classification"]["num_pos"] = num_pos_va
results_va["classification"]["num_neg"] = num_neg_va
results_va["regression"]["num_samples"] = num_regr_va

if results_tr is not None:
    results_tr["classification"]["num_pos"] = num_pos - num_pos_va
    results_tr["classification"]["num_neg"] = num_neg - num_neg_va
    results_tr["regression"]["num_samples"] = num_regr - num_regr_va

stats=None
if args.normalize_regression == 1 :
   stats={}
   stats["mean"] = mean_save
   stats["var"]  = np.array(var_save)[0]
sc.save_results(out_file, args, validation=results_va, training=results_tr, stats=stats)

vprint(f"Saved config and results into '{out_file}'.\nYou can load the results by:\n  import sparsechem as sc\n  res = sc.load_results('{out_file}')")


Saving performance metrics (AUCs) and model.
Saved model weights into '../experiments/SparseChem-cb29-1task/1000x0_0912_1601_lr0.001_do0.45/sc_1000_lr0.001_do0.45.pt'.
Saved config and results into '../experiments/SparseChem-cb29-1task/1000x0_0912_1601_lr0.001_do0.45/sc_1000_lr0.001_do0.45.json'.
You can load the results by:
  import sparsechem as sc
  res = sc.load_results('../experiments/SparseChem-cb29-1task/1000x0_0912_1601_lr0.001_do0.45/sc_1000_lr0.001_do0.45.json')


In [49]:
print()
print(results_va['classification'][0:20])
print()
print(results_va.keys())
pp.pprint(results_va['classification_agg'])


      roc_auc_score    auc_pr  avg_prec_score    f1_max  p_f1_max     kappa  kappa_max  p_kappa_max   bceloss  auc_pr_cal  num_pos  num_neg
task                                                                                                                                       
0               NaN       NaN             NaN       NaN       NaN       NaN        NaN          NaN       NaN         NaN        0        0
1               NaN       NaN             NaN       NaN       NaN       NaN        NaN          NaN       NaN         NaN        0        0
2               NaN       NaN             NaN       NaN       NaN       NaN        NaN          NaN       NaN         NaN        0        0
3               NaN       NaN             NaN       NaN       NaN       NaN        NaN          NaN       NaN         NaN        0        0
4          0.947173  0.999010        0.999011  0.990968  0.697061  0.000000   0.318420     0.846933  0.063533    0.912445      384        7
5          0.898051

In [60]:
print(df[pd.isna(df.roc_auc_score)])
print(df[pd.notna(df.roc_auc_score)])

      roc_auc_score  auc_pr  avg_prec_score  f1_max  p_f1_max  kappa  kappa_max  p_kappa_max  bceloss  auc_pr_cal  num_pos  num_neg
task                                                                                                                               
0               NaN     NaN             NaN     NaN       NaN    NaN        NaN          NaN      NaN         NaN        0        0
1               NaN     NaN             NaN     NaN       NaN    NaN        NaN          NaN      NaN         NaN        0        0
2               NaN     NaN             NaN     NaN       NaN    NaN        NaN          NaN      NaN         NaN        0        0
3               NaN     NaN             NaN     NaN       NaN    NaN        NaN          NaN      NaN         NaN        0        0
10              NaN     NaN             NaN     NaN       NaN    NaN        NaN          NaN      NaN         NaN        0       82
...             ...     ...             ...     ...       ...    ...        

In [50]:
ns.wandb_run.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
auc_pr,▁▄▆▇▇▇████
auc_pr_cal,▁▄▆▇▇█████
avg_prec_score,▁▄▆▇▇▇████
bceloss,█▅▃▂▂▂▁▂▁▁
best_accuracy,▁▄▆▇▇▇████
best_epoch,▁▂▃▃▄▅▆▆▇█
best_roc_auc,▁▄▆▆▇▇████
f1_max,▁▄▆▆▇▇████
kappa,▁▄▅▆▆▇▇▇██
kappa_max,▁▄▆▇▇▇████

0,1
auc_pr,0.65166
auc_pr_cal,0.43031
avg_prec_score,0.66453
bceloss,0.47142
f1_max,0.69633
kappa,0.24023
kappa_max,0.46664
logloss,0.36496
num_tasks_agg,2038.0
num_tasks_total,3552.0


## Results 

In [23]:
print()
print(results_va['classification'][0:50])


      roc_auc_score    auc_pr  avg_prec_score    f1_max  p_f1_max     kappa  \
task                                                                          
0          0.991560  0.998782        0.998752  0.982301  0.980404  0.786753   
1               NaN       NaN             NaN       NaN       NaN       NaN   
2               NaN       NaN             NaN       NaN       NaN       NaN   
3               NaN       NaN             NaN       NaN       NaN       NaN   
4               NaN       NaN             NaN       NaN       NaN       NaN   
5          0.944444  0.996995        0.997076  0.972973  0.562853  0.000000   
6          1.000000  1.000000        1.000000  1.000000  0.973055  0.797642   
7          0.666667  0.461111        0.588889  0.750000  0.763078  0.000000   
8          1.000000  1.000000        1.000000  1.000000  0.987239  0.000000   
9          1.000000  1.000000        1.000000  1.000000  0.998716  0.660870   
10              NaN       NaN             NaN      

In [45]:
print( num_neg.sum())
print( num_pos.sum())
print( num_neg_va.sum())
print( num_pos_va.sum())

2445
18704
505
3804


In [None]:
for i in zip(dldrs.valset.num_pos, dldrs.valset.num_neg):
    print(f" {i[0]:4d}  {i[1]:4d}")
for i in zip(dldrs.valset.num_pos, dldrs.valset.num_neg):
    print(f" {i[0]:4d}  {i[1]:4d}")

In [24]:
# pp.pprint(results_va)
print()

pp.pprint(results_va['classification_agg'])
print()


roc_auc_score        0.900757
auc_pr               0.943594
avg_prec_score       0.945930
f1_max               0.938011
p_f1_max             0.723872
kappa                0.611823
kappa_max            0.803714
p_kappa_max          0.826011
bceloss              0.296447
auc_pr_cal           0.827893
logloss              0.087415
num_tasks_total    100.000000
num_tasks_agg       20.000000
dtype: float64



In [40]:
print(num_pos_va)
print(num_neg_va)
print(num_regr_va)
for i in zip(num_pos_va, num_neg_va, num_pos, num_neg):
    print(f" {i[0]:4d}  {i[1]:4d}    trianing: {i[2]:4d}   {i[3]:4d}")

[115  31  17  54   7  18 203   3  55  76  63   1  42   0  75  36   0   1  43   2   7   7   0   3   0  21   1  37 330  11  86   0   0 142  27  25   4
   7 111  12   7  79   0  97 129  15  10   9   9  38 272   1   2   0 285  63 185   1   0   4  19  21  13   0  18  12   0   0  93  55   1  73  29  41
  18   0  46  37  14   0  40  18   6   8  10 330   6   4   1   0   0   0   8   0   0   0   3   0   1   0]
[17  0  0  0  0  1  3  4  2  2  0  0  0  1  2  0  0  0  0  2  0  0 13  0  0  0  0  0  6  0  0 30  0  0  0  3  3  3  2  0  0  1  1  1 18  0  2  0 85
  0  0  2 13 47  1 11  0  0  0  2  0  1  1  1  3  0  0  0  0  0 18  0  9  3  0  4  1  0 23 11  6  0  5  0  0 46  4  4 73  2  0  7  1  0  0  0  0  0
  4  0]
[]
  115    17    trianing:  884     83
   31     0    trianing:   59      0
   17     0    trianing:   59      0
   54     0    trianing:  284      0
    7     0    trianing:   55      0
   18     1    trianing:   43      3
  203     3    trianing:  749     29
    3     4    trianing:   14 

In [26]:
df[pd.notna(df.roc_auc_score)].mean()

roc_auc_score      0.874262
auc_pr             0.922464
avg_prec_score     0.932781
f1_max             0.938123
p_f1_max           0.746278
kappa              0.400306
kappa_max          0.754262
p_kappa_max        0.830485
bceloss            0.385833
auc_pr_cal         0.803581
num_pos           58.512821
num_neg            9.948718
dtype: float64

In [27]:
del net

## Misc

In [33]:
import wandb
def restart_wandb(exp_id, exp_name, project_name, resume = "allow" ):
    print(exp_id, exp_name, project_name) 
    wandb_run = wandb.init(project = project_name, 
                                     entity  = "kbardool", 
                                     id      = exp_id, 
                                     name    = exp_name,
                                     resume=resume )
    
    print(f" PROJECT NAME: {wandb_run.project}\n"
          f" RUN ID      : {wandb_run.id} \n"
          f" RUN NAME    : {wandb_run.name}")     
 
    return wandb_run 


In [34]:
run = restart_wandb("d2rw3bdq","0413_0509","SparseChem-Mini")

d2rw3bdq 0413_0509 SparseChem-Mini





VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

You should always run with libnvidia-ml.so that is installed with your
NVIDIA Display Driver. By default it's installed in /usr/lib and /usr/lib64.
libnvidia-ml.so in GDK package is a stub library that is attached only for
build purposes (e.g. machine that you build your application doesn't have
to have Display Driver installed).
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Linked to libnvidia-ml library at wrong path : /usr/lib64/libnvidia-ml.so.1



 PROJECT NAME: SparseChem-Mini
 RUN ID      : d2rw3bdq 
 RUN NAME    : 0413_0509


In [43]:
print(run)

NameError: name 'run' is not defined

In [41]:
run.finish()

In [42]:
del run

In [7]:
# cmd = (
#   f" --x       /home/kbardool/kusanagi/MLDatasets/chembl_23mini_synthetic/chembl_23mini_x.npy " +
#   f" --y_class /home/kbardool/kusanagi/MLDatasets/chembl_23mini_synthetic/chembl_23mini_adashare_y_all_bin_sparse.npy " +
#   f" --folding /home/kbardool/kusanagi/MLDatasets/chembl_23mini_synthetic/chembl_23mini_folds.npy " +
#   f" --output_dir {output_dir}" +    
#   f" --fold_va           0 " +
#   f" --batch_ratio    0.02 " +
#   f" --hidden_sizes   25 25 25 25 25 25 " +
#   f" --dropouts_trunk  0  0  0  0  0  0 " +
#   f" --weight_decay   1e-4 " 
#   f" --epochs           40 " +
#   f" --lr             1e-3 " +
#   f" --lr_steps         10 " +
#   f" --lr_alpha        0.3" 
# )

# cmd = (
#   f" --x       {data_dir}/chembl_23mini_x.npy " +
#   f" --y_class {data_dir}/chembl_23mini_adashare_y_all_bin_sparse.npy " +
#   f" --folding {data_dir}/chembl_23mini_folds.npy " +
#   f" --output_dir {output_dir}" +    
#   f" --fold_va            0 " +
#   f" --batch_ratio     0.02 " +
#   f" --hidden_sizes   40 40 " +
#   f" --dropouts_trunk  0  0 " +
#   f" --weight_decay   1e-4 " +
#   f" --epochs           20 " +
#   f" --lr             1e-3 " +
#   f" --lr_steps         10 " +
#   f" --lr_alpha        0.3 " 
# )

#   f" --hidden_sizes   400 400 " +
#   f" --last_dropout   0.2 " +
#   f" --middle_dropout 0.2 " +
#   f" --x       ./{data_dir}/chembl_23_x.mtx " +
#   f" --y_class ./{data_dir}/chembl_23_y.mtx " +
#   f" --folding ./{data_dir}/folding_hier_0.6.npy " +

#### copied from SparseChemDev 

# cmd = (
#         f" --x       ./{data_dir}/chembl_23mini_x.npy" +
#         f" --y_class ./{data_dir}/chembl_23mini_y.npy" +
#         f" --folding ./{data_dir}/chembl_23mini_folds.npy" +
#         f" --hidden_sizes 20 30 40 " +  
#         f" --output_dir {output_dir}" +
#         f" --batch_ratio 0.1" +
#         f" --epochs 2" +
#         f" --lr 1e-3" +
#         f" --lr_steps 1" +
#         f" --dev {dev}" +
#         f" --verbose 1")
#         f" --input_size_freq  40"
#         f" --tail_hidden_size  10"

In [None]:
# data_dir="chembl23_data"
# data_dir="chembl23_run_01152022"
# rstr = "synthetic_data_model" ##random_str(12)
# rstr = "synthetic_data_model_03042022" ##random_str(12)
# output_dir = f"./models-{rstr}/"
# output_dir = f"./{data_dir}/models-{rstr}/"
# output dir kbardool/kusanagi/experiments/SparseChem/0116_0843


In [4]:
# dev = "gpu" 
# data_dir="chembl23_data"
# data_dir="chembl23_run_01152022"
# data_dir = "/home/kbardool/kusanagi/MLDatasets/chembl_23mini_synthetic"

# rm_output=False

# rstr = datetime.now().strftime("%m%d_%H%M")
# rstr = "synthetic_data_model" ##random_str(12)
# rstr = "synthetic_data_model_03042022" ##random_str(12)

# output_dir = f"./models-{rstr}/"
# output_dir = f"./{data_dir}/models-{rstr}/"

# output dir kbardool/kusanagi/experiments/SparseChem/0116_0843
# output_dir = f"/home/kbardool/kusanagi/experiments/SparseChem/{rstr}"
# print(output_dir)