## Train SparseChem on Chembl_mini 
Output to `experiments/SparseChem`

In [1]:
# from IPython.core.display import display, HTML
# display(HTML("<style>.container { width:90% !important; }</style>"))
%load_ext autoreload
%autoreload 2

# Copyright (c) 2020 KU Leuven
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"

import argparse
import sys
import os.path
import time
import json
import functools
import types
import wandb
from datetime import datetime
import pprint
import csv
import copy 
from contextlib import redirect_stdout
import sparsechem as sc
from sparsechem import Nothing
from sparsechem.notebook_helper import (check_for_improvement,init_wandb, initialize,
                                        assertions)
import scipy.io
import scipy.sparse
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from pytorch_memlab import MemReporter
from pynvml import *

pp = pprint.PrettyPrinter(indent=4)
np.set_printoptions(edgeitems=3, infstr='inf', linewidth=150, nanstr='nan')
torch.set_printoptions( linewidth=132)
os.environ['WANDB_NOTEBOOK_NAME'] = 'SparseChem_Train_mini.ipynb'
if torch.cuda.is_available():
    nvmlInit()

#import warnings
# from torch.serialization import SourceChangeWarning 
#warnings.filterwarnings("ignore", category=UserWarning)    

# import multiprocessing
# multiprocessing.set_start_method('fork', force=True)

Could not find CUDA deivces and reset CUDA stats and cache


### Setup command line parameters

In [2]:
datadir="../MLDatasets/chembl23_mini"
outdir ="../experiments/mini-SparseChem"

cmd = (
  f" --data_dir                    {datadir} " +
  f" --output_dir                   {outdir} " +
  f" --x                 chembl_23mini_x.npy " +
  f" --y_class           chembl_23mini_y.npy " +
  f" --folding       chembl_23mini_folds.npy " +
  f" --dev                               cpu " +
  f" --fold_va                             0 " +
  f" --fold_inputs                     32000 " +
  f" --batch_ratio                      0.01 " +
  f" --batch_size                        128 " +
  f" --hidden_sizes              600 600 600 " +
  f" --dropouts_trunk            0.65 0.65 0.65 " +
  f" --dropouts_class                      0 " +
  f" --weight_decay                     1e-4 " +
  f" --epochs                            100 " +
  f" --lr                               1e-3 " +
  f" --lr_steps                           10 " +
  f" --lr_alpha                          0.3 " + 
  f" --prefix                             sc " +
  f" --min_samples_class                   1 "
)

# f" --dev              cuda:0 "
# f" --dev              cuda:0 "

### Initializations 

In [3]:
args = initialize(cmd)
def vprint(s=""):
    if args.verbose:
        print(s)


  command line parms : 
------------------------
 data_dir.................  ../MLDatasets/chembl23_mini
 output_dir...............  ../experiments/mini-SparseChem
 x........................  chembl_23mini_x.npy
 y_class..................  chembl_23mini_y.npy
 y_regr...................  None
 y_censor.................  None
 project_name.............  SparseChem-Mini
 exp_id...................  None
 exp_name.................  None
 folder_sfx...............  None
 weights_class............  None
 weights_regr.............  None
 censored_loss............  1
 folding..................  chembl_23mini_folds.npy
 fold_va..................  0
 fold_te..................  None
 batch_ratio..............  0.01
 internal_batch_max.......  None
 normalize_loss...........  None
 normalize_regression.....  0
 normalize_regr_va........  0
 inverse_normalization....  0
 hidden_sizes.............  [600, 600, 600]
 last_hidden_sizes........  None
 weight_decay.............  0.0001
 last_non_linearit

In [4]:
pp.pprint(vars(args))

{   'batch_ratio': 0.01,
    'batch_size': 128,
    'censored_loss': 1,
    'class_feature_size': -1,
    'data_dir': '../MLDatasets/chembl23_mini',
    'dev': 'cpu',
    'dropouts_class': [0.0],
    'dropouts_reg': [],
    'dropouts_trunk': [0.65, 0.65, 0.65],
    'enable_cat_fusion': 0,
    'epochs': 100,
    'eval_frequency': 1,
    'eval_train': 0,
    'exp_id': '575n9nj4',
    'exp_name': '0414_1600',
    'fold_inputs': 32000,
    'fold_te': None,
    'fold_va': 0,
    'folder_sfx': None,
    'folding': '../MLDatasets/chembl23_mini/chembl_23mini_folds.npy',
    'hidden_sizes': [600, 600, 600],
    'input_size_freq': None,
    'input_transform': 'none',
    'internal_batch_max': None,
    'inverse_normalization': 0,
    'last_hidden_sizes': None,
    'last_hidden_sizes_class': None,
    'last_hidden_sizes_reg': None,
    'last_non_linearity': 'relu',
    'lr': 0.001,
    'lr_alpha': 0.3,
    'lr_steps': [10],
    'middle_non_linearity': 'relu',
    'min_samples_auc': None,
    'min

### Assertions

In [5]:
assertions(args)

All assertions passed successfully


### Summary writer

In [6]:
if args.profile == 1:
    assert (args.save_board==1), "Tensorboard should be enabled to be able to profile memory usage."
if args.save_board:
    tb_name = os.path.join(args.output_dir, "", args.name)
    writer  = SummaryWriter(tb_name)
else:
    writer = Nothing()

### Load datasets

In [7]:
ecfp     = sc.load_sparse(args.x)
y_class  = sc.load_sparse(args.y_class)
y_regr   = sc.load_sparse(args.y_regr)
y_censor = sc.load_sparse(args.y_censor)

if (y_regr is None) and (y_censor is not None):
    raise ValueError("y_censor provided please also provide --y_regr.")
if y_class is None:
    y_class = scipy.sparse.csr_matrix((ecfp.shape[0], 0))
if y_regr is None:
    y_regr  = scipy.sparse.csr_matrix((ecfp.shape[0], 0))
if y_censor is None:
    y_censor = scipy.sparse.csr_matrix(y_regr.shape)

folding = np.load(args.folding)
assert ecfp.shape[0] == folding.shape[0], "x and folding must have same number of rows"

## Loading task weights
tasks_class = sc.load_task_weights(args.weights_class, y=y_class, label="y_class")
tasks_regr  = sc.load_task_weights(args.weights_regr, y=y_regr, label="y_regr")

## Input transformation
ecfp = sc.fold_transform_inputs(ecfp, folding_size=args.fold_inputs, transform=args.input_transform)
print(f"count non zero:{ecfp[0].count_nonzero()}")
num_pos    = np.array((y_class == +1).sum(0)).flatten()
num_neg    = np.array((y_class == -1).sum(0)).flatten()
num_class  = np.array((y_class != 0).sum(0)).flatten()
if (num_class != num_pos + num_neg).any():
    raise ValueError("For classification all y values (--y_class/--y) must be 1 or -1.")

num_regr   = np.bincount(y_regr.indices, minlength=y_regr.shape[1])

assert args.min_samples_auc is None, "Parameter 'min_samples_auc' is obsolete. Use '--min_samples_class' that specifies how many samples a task needs per FOLD and per CLASS to be aggregated."

if tasks_class.aggregation_weight is None:
    ## using min_samples rule
    fold_pos, fold_neg = sc.class_fold_counts(y_class, folding)
    n = args.min_samples_class
    tasks_class.aggregation_weight = ((fold_pos >= n).all(0) & (fold_neg >= n)).all(0).astype(np.float64)

if tasks_regr.aggregation_weight is None:
    if y_censor.nnz == 0:
        y_regr2 = y_regr.copy()
        y_regr2.data[:] = 1
    else:
        ## only counting uncensored data
        y_regr2      = y_censor.copy()
        y_regr2.data = (y_regr2.data == 0).astype(np.int32)
    fold_regr, _ = sc.class_fold_counts(y_regr2, folding)
    del y_regr2
    tasks_regr.aggregation_weight = (fold_regr >= args.min_samples_regr).all(0).astype(np.float64)

vprint(f"Input dimension: {ecfp.shape[1]}")
vprint(f"#samples:        {ecfp.shape[0]}")
vprint(f"#classification tasks:  {y_class.shape[1]}")
vprint(f"#regression tasks:      {y_regr.shape[1]}")
vprint(f"Using {(tasks_class.aggregation_weight > 0).sum()} classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).")
vprint(f"Using {(tasks_regr.aggregation_weight > 0).sum()} regression tasks for calculating metrics (RMSE, Rsquared, correlation).")


count non zero:80
Input dimension: 32000
#samples:        18388
#classification tasks:  100
#regression tasks:      0
Using 20 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
Using 0 regression tasks for calculating metrics (RMSE, Rsquared, correlation).


In [8]:
if args.fold_te is not None and args.fold_te >= 0:
    ## removing test data
    assert args.fold_te != args.fold_va, "fold_va and fold_te must not be equal."
    keep    = folding != args.fold_te
    ecfp    = ecfp[keep]
    y_class = y_class[keep]
    y_regr  = y_regr[keep]
    y_censor = y_censor[keep]
    folding = folding[keep]

normalize_inv = None
if args.normalize_regression == 1 and args.normalize_regr_va == 1:
   y_regr, mean_save, var_save = sc.normalize_regr(y_regr)
fold_va = args.fold_va
idx_tr  = np.where(folding != fold_va)[0]
idx_va  = np.where(folding == fold_va)[0]

y_class_tr = y_class[idx_tr]
y_class_va = y_class[idx_va]
y_regr_tr  = y_regr[idx_tr]
y_regr_va  = y_regr[idx_va]
y_censor_tr = y_censor[idx_tr]
y_censor_va = y_censor[idx_va]

if args.normalize_regression == 1 and args.normalize_regr_va == 0:
   y_regr_tr, mean_save, var_save = sc.normalize_regr(y_regr_tr) 
   if args.inverse_normalization == 1:
      normalize_inv = {}
      normalize_inv["mean"] = mean_save
      normalize_inv["var"]  = var_save
num_pos_va  = np.array((y_class_va == +1).sum(0)).flatten()
num_neg_va  = np.array((y_class_va == -1).sum(0)).flatten()
num_regr_va = np.bincount(y_regr_va.indices, minlength=y_regr.shape[1])
pos_rate = num_pos_va/(num_pos_va+num_neg_va)
pos_rate_ref = args.pi_zero
pos_rate = np.clip(pos_rate, 0, 0.99)
cal_fact_aucpr = pos_rate*(1-pos_rate_ref)/(pos_rate_ref*(1-pos_rate))

vprint(f"Input dimension   : {ecfp.shape[1]}")
vprint(f"Input dimension   : {ecfp.shape[1]}")
vprint(f"Training dataset  : {ecfp[idx_tr].shape}")
vprint(f"Validation dataset: {ecfp[idx_va].shape}")
vprint()
vprint(f"#classification tasks:  {y_class.shape[1]}")
vprint(f"#regression tasks    :      {y_regr.shape[1]}")
vprint(f"Using {(tasks_class.aggregation_weight > 0).sum():3d} classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).")
vprint(f"Using {(tasks_regr.aggregation_weight > 0).sum():3d} regression tasks for calculating metrics (RMSE, Rsquared, correlation).")

num_int_batches = 1
if args.batch_size is not None:
    batch_size = args.batch_size
else:
    batch_size = int(np.ceil(args.batch_ratio * idx_tr.shape[0]))

print(f"orig batch size:   {batch_size}")
print(f"orig num int batches:   {num_int_batches}")

if args.internal_batch_max is not None:
    if args.internal_batch_max < batch_size:
        num_int_batches = int(np.ceil(batch_size / args.internal_batch_max))
        batch_size      = int(np.ceil(batch_size / num_int_batches))
print(f"batch size:   {batch_size}")
print(f"num_int_batches:   {num_int_batches}")

Input dimension   : 32000
Input dimension   : 32000
Training dataset  : (14633, 32000)
Validation dataset: (3755, 32000)

#classification tasks:  100
#regression tasks    :      0
Using  20 classification tasks for calculating aggregated metrics (AUCROC, F1_max, etc).
Using   0 regression tasks for calculating metrics (RMSE, Rsquared, correlation).
orig batch size:   128
orig num int batches:   1
batch size:   128
num_int_batches:   1


  pos_rate = num_pos_va/(num_pos_va+num_neg_va)


In [9]:
# #import ipdb; ipdb.set_trace()
# batch_size  = int(np.ceil(args.batch_ratio * idx_tr.shape[0]))
# num_int_batches = 1

# if args.internal_batch_max is not None:
#     if args.internal_batch_max < batch_size:
#         num_int_batches = int(np.ceil(batch_size / args.internal_batch_max))
#         batch_size      = int(np.ceil(batch_size / num_int_batches))
# vprint(f"#internal batch size:   {batch_size}")

In [10]:
tasks_cat_id_list = None
select_cat_ids = None
if tasks_class.cat_id is not None:
    tasks_cat_id_list = [[x,i] for i,x in enumerate(tasks_class.cat_id) if str(x) != 'nan']
    tasks_cat_ids = [i for i,x in enumerate(tasks_class.cat_id) if str(x) != 'nan']
    select_cat_ids = np.array(tasks_cat_ids)
    cat_id_size = len(tasks_cat_id_list)
else:
    cat_id_size = 0

### Dataloaders

In [11]:
dataset_tr = sc.ClassRegrSparseDataset(x=ecfp[idx_tr], y_class=y_class_tr, y_regr=y_regr_tr, y_censor=y_censor_tr, y_cat_columns=select_cat_ids)
dataset_va = sc.ClassRegrSparseDataset(x=ecfp[idx_va], y_class=y_class_va, y_regr=y_regr_va, y_censor=y_censor_va, y_cat_columns=select_cat_ids)

loader_tr = DataLoader(dataset_tr, batch_size=batch_size, num_workers = 8, pin_memory=True, collate_fn=dataset_tr.collate, shuffle=True)
loader_va = DataLoader(dataset_va, batch_size=batch_size, num_workers = 4, pin_memory=True, collate_fn=dataset_va.collate, shuffle=False)

args.input_size  = dataset_tr.input_size
args.output_size = dataset_tr.output_size

args.class_output_size = dataset_tr.class_output_size
args.regr_output_size  = dataset_tr.regr_output_size
args.cat_id_size = cat_id_size



###  WandB setup

In [12]:
#------------------------------------------------------------------
# ### WandB setup
#------------------------------------------------------------------
ns = types.SimpleNamespace()
ns.current_epoch  = 0
ns.current_iter   = 0
ns.best_results   = {}
ns.best_metrics   = None
ns.best_value     = 0 
ns.best_iter      = 0
ns.best_epoch     = 0
ns.p_epoch        = 0
ns.num_prints     = 0

init_wandb(ns, args)
wandb.define_metric("best_accuracy", summary="last")
wandb.define_metric("best_epoch", summary="last")

575n9nj4 0414_1600 SparseChem-Mini


[34m[1mwandb[0m: Currently logged in as: [33mkbardool[0m (use `wandb login --relogin` to force relogin)


 PROJECT NAME: SparseChem-Mini
 RUN ID      : 575n9nj4 
 RUN NAME    : 0414_1600


<wandb.sdk.wandb_metric.Metric at 0x2b9121202250>

### Network

In [13]:
#------------------------------------------------------------------
# ### Network
#------------------------------------------------------------------
dev  = torch.device(args.dev)

net  = sc.SparseFFN(args).to(dev)
loss_class = torch.nn.BCEWithLogitsLoss(reduction="none")
loss_regr  = sc.censored_mse_loss

if not args.censored_loss:
    loss_regr = functools.partial(loss_regr, censored_enabled=False)

tasks_class.training_weight = tasks_class.training_weight.to(dev)
tasks_regr.training_weight  = tasks_regr.training_weight.to(dev)
tasks_regr.censored_weight  = tasks_regr.censored_weight.to(dev)

print("Network:")
print(net)

Network:
SparseFFN(
  (net): Sequential(
    (0): SparseInputNet(
      (net_freq): SparseLinear(in_features=32000, out_features=600, bias=True)
    )
    (1): MiddleNet(
      (net): Sequential(
        (layer_0): Sequential(
          (0): ReLU()
          (1): Dropout(p=0.65, inplace=False)
          (2): Linear(in_features=600, out_features=600, bias=True)
        )
        (layer_1): Sequential(
          (0): ReLU()
          (1): Dropout(p=0.65, inplace=False)
          (2): Linear(in_features=600, out_features=600, bias=True)
        )
      )
    )
  )
  (classLast): LastNet(
    (net): Sequential(
      (initial_layer): Sequential(
        (0): ReLU()
        (1): Dropout(p=0.65, inplace=False)
        (2): Linear(in_features=600, out_features=100, bias=True)
      )
    )
  )
  (regrLast): Sequential(
    (0): LastNet(
      (net): Sequential(
        (initial_layer): Sequential(
          (0): Tanh()
          (1): Dropout(p=0.65, inplace=False)
          (2): Linear(in_fea



###  Optimizer, Scheduler, GradScaler

In [14]:
#------------------------------------------------------------------
# ###  Optimizer, Scheduler, GradScaler
#------------------------------------------------------------------
optimizer = torch.optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay)
scheduler = MultiStepLR(optimizer, milestones=args.lr_steps, gamma=args.lr_alpha)
scaler = torch.cuda.amp.GradScaler()

wandb.watch(net, log='all', log_freq= 10)     ###  Weights and Biases Initialization 
reporter = None
h = None



### setup memory profiling reporter

In [15]:
if args.profile == 1:
   torch_gpu_id = torch.cuda.current_device()
   if "CUDA_VISIBLE_DEVICES" in os.environ:
      ids = list(map(int, os.environ.get("CUDA_VISIBLE_DEVICES", "").split(",")))
      nvml_gpu_id = ids[torch_gpu_id] # remap
   else:
      nvml_gpu_id = torch_gpu_id
   h = nvmlDeviceGetHandleByIndex(nvml_gpu_id)

if args.profile == 1:
   #####   output saving   #####
   if not os.path.exists(args.output_dir):
       os.makedirs(args.output_dir)

   reporter = MemReporter(net)

   with open(f"{args.output_dir}/memprofile.txt", "w+") as profile_file:
        with redirect_stdout(profile_file):
             profile_file.write(f"\nInitial model detailed report:\n\n")
             reporter.report()

In [16]:
#------------------------------------------------------------------
# ### Display network and other values
#------------------------------------------------------------------
print(optimizer)
# args.eval_train = 0 
# args.epochs     = 5
print(f"dev                  :    {dev}")
print(f"args.lr              :    {args.hidden_sizes}")
print(f"args.lr              :    {args.lr}")
print(f"args.weight_decay    :    {args.weight_decay}")
print(f"args.lr_steps        :    {args.lr_steps}")
print(f"args.lr_steps        :    {args.lr_steps}")
print(f"num_int_batches      :    {num_int_batches}")
print(f"batch_size           :    {batch_size}")
print(f"EPOCHS               :    {args.epochs}")
print(f"scaler               :    {scaler}")
print(f"args.normalize_loss  :    {args.normalize_loss}")
print(f"loss_class           :    {loss_class}")
print(f"mixed precision      :    {args.mixed_precision}")
print(f"args.eval_train      :    {args.eval_train}")
print(dev.type == 'cpu') 


Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    initial_lr: 0.001
    lr: 0.001
    weight_decay: 0.0001
)
dev                  :    cpu
args.lr              :    [600, 600, 600]
args.lr              :    0.001
args.weight_decay    :    0.0001
args.lr_steps        :    [10]
args.lr_steps        :    [10]
num_int_batches      :    1
batch_size           :    128
EPOCHS               :    100
scaler               :    <torch.cuda.amp.grad_scaler.GradScaler object at 0x2b912178bbe0>
args.normalize_loss  :    None
loss_class           :    BCEWithLogitsLoss()
mixed precision      :    0
args.eval_train      :    0
True


##  Training Loop

In [17]:
import warnings
# from torch.serialization import SourceChangeWarning 
warnings.filterwarnings("ignore", category=UserWarning)    

In [18]:
ns.end_epoch = ns.current_epoch + args.epochs

for ns.current_epoch in range(ns.current_epoch, ns.end_epoch, 1):
    t0 = time.time()
    sc.train_class_regr(
        net, optimizer,
        loader          = loader_tr,
        loss_class      = loss_class,
        loss_regr       = loss_regr,
        dev             = dev,
        weights_class   = tasks_class.training_weight * (1-args.regression_weight) * 2,
        weights_regr    = tasks_regr.training_weight * args.regression_weight * 2,
        censored_weight = tasks_regr.censored_weight,
        normalize_loss  = args.normalize_loss,
        num_int_batches = num_int_batches,
        progress        = False,
        writer          = writer,
        epoch           = ns.current_epoch,
        args            = args,
        scaler          = scaler,
        nvml_handle     = h)

    if args.profile == 1:
       with open(f"{args.output_dir}/memprofile.txt", "a+") as profile_file:
            profile_file.write(f"\nAfter epoch {epoch} model detailed report:\n\n")
            with redirect_stdout(profile_file):
                 reporter.report()

    t1 = time.time()
    eval_round = (args.eval_frequency > 0) and ((ns.current_epoch + 1) % args.eval_frequency == 0)
    last_round = ns.current_epoch == args.epochs - 1

    if eval_round or last_round:

        results_va = sc.evaluate_class_regr(net, loader_va, loss_class, loss_regr, 
                                            tasks_class= tasks_class, 
                                            tasks_regr = tasks_regr, 
                                            dev        = dev, 
                                            progress   = False, 
                                            normalize_inv=normalize_inv, 
                                            cal_fact_aucpr=cal_fact_aucpr)
        
        for key, val in results_va["classification_agg"].items():
            writer.add_scalar("val_metrics:aggregated/"+key, val, ns.current_epoch * batch_size)


        if args.eval_train:
            results_tr = sc.evaluate_class_regr(net, loader_tr, loss_class, loss_regr, 
                                                tasks_class = tasks_class, 
                                                tasks_regr  = tasks_regr, 
                                                dev         = dev, 
                                                progress    = args.verbose >= 2)
            for key, val in results_tr["classification_agg"].items():
                writer.add_scalar("trn_metrics:aggregated/"+key, val, ns.current_epoch * batch_size)

        else:
            results_tr = None

        if args.verbose:
            ## printing a new header every 20 lines
            header = ns.num_prints % 20 == 0
            ns.num_prints += 1
            sc.print_metrics_cr(ns.current_epoch, t1 - t0, results_tr, results_va, header)
            
        wandb.log(results_va["classification_agg"].to_dict())

        check_for_improvement(ns, results_va)
    
    scheduler.step()



Epoch  |      logl   bceloss  avg prec   auc roc    auc pr aucpr_cal    f1_max |      rmse  rsquared  corrcoef | tr_time 
0      |   0.12110   0.33791   0.85121   0.82580   0.84244   0.70208   0.86863 |       nan       nan       nan |   11.5 
Previous best_epoch:     0   best iter:     0,   best_value: 0.00000
New      best_epoch:     0   best iter:     0,   best_value: 0.85121
1      |   0.09937   0.27976   0.90203   0.89778   0.89653   0.81097   0.89691 |       nan       nan       nan |   13.1 
Previous best_epoch:     0   best iter:     0,   best_value: 0.85121
New      best_epoch:     1   best iter:     0,   best_value: 0.90203
2      |   0.09358   0.25963   0.93406   0.90278   0.93111   0.83246   0.92854 |       nan       nan       nan |   11.8 
Previous best_epoch:     1   best iter:     0,   best_value: 0.90203
New      best_epoch:     2   best iter:     0,   best_value: 0.93406
3      |   0.08420   0.24055   0.94384   0.90958   0.94219   0.85367   0.94488 |       nan       nan 

56     |   0.11581   0.39291   0.94053   0.88350   0.93810   0.83249   0.94432 |       nan       nan       nan |   20.8 
57     |   0.11451   0.39450   0.93595   0.87821   0.93266   0.81969   0.94025 |       nan       nan       nan |   21.2 
58     |   0.11954   0.42541   0.94006   0.88000   0.93730   0.82085   0.94124 |       nan       nan       nan |   22.7 
59     |   0.11878   0.44885   0.93932   0.87645   0.93711   0.82294   0.94245 |       nan       nan       nan |   21.2 
Epoch  |      logl   bceloss  avg prec   auc roc    auc pr aucpr_cal    f1_max |      rmse  rsquared  corrcoef | tr_time 
60     |   0.12070   0.41997   0.93738   0.87837   0.93457   0.81765   0.93866 |       nan       nan       nan |   21.1 
61     |   0.12292   0.43225   0.94017   0.88359   0.93792   0.82866   0.93875 |       nan       nan       nan |   21.9 
62     |   0.12901   0.46336   0.93782   0.88185   0.93548   0.82303   0.93931 |       nan       nan       nan |   22.0 
63     |   0.12029   0.40952   

wandb: Network error (ReadTimeout), entering retry loop.


74     |   0.11376   0.41420   0.93751   0.88444   0.93486   0.82702   0.94046 |       nan       nan       nan |   22.4 
75     |   0.11521   0.40900   0.91412   0.88579   0.89899   0.80038   0.92706 |       nan       nan       nan |   22.8 
76     |   0.11651   0.39107   0.94958   0.90976   0.94781   0.84980   0.94627 |       nan       nan       nan |   24.0 
77     |   0.11822   0.38818   0.94944   0.91101   0.94781   0.85226   0.94132 |       nan       nan       nan |   23.1 
78     |   0.11836   0.37622   0.94912   0.90968   0.94728   0.84470   0.94232 |       nan       nan       nan |   22.9 
79     |   0.12005   0.37983   0.94952   0.91610   0.94744   0.84840   0.94615 |       nan       nan       nan |   23.4 
Epoch  |      logl   bceloss  avg prec   auc roc    auc pr aucpr_cal    f1_max |      rmse  rsquared  corrcoef | tr_time 
80     |   0.12450   0.43207   0.93301   0.88633   0.92901   0.82380   0.94112 |       nan       nan       nan |   22.7 
81     |   0.12400   0.40750   

In [25]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best Precision :   {ns.best_value:.5f}\n")

Best Epoch :       44
Best Iteration :   0 
Best Precision :   0.97445



In [22]:
print(f"Best Epoch :       {ns.best_epoch}\n"
      f"Best Iteration :   {ns.best_iter} \n"
      f"Best Precision :   {ns.best_value:.5f}\n")
print()
pp.pprint(results_va['classification_agg'].to_dict())

Best Epoch :       60
Best Iteration :   0 
Best Precision :   0.94426


{   'auc_pr': 0.9345455193395412,
    'auc_pr_cal': 0.8043372361813058,
    'avg_prec_score': 0.9380538265076385,
    'bceloss': 0.27802124503068626,
    'f1_max': 0.9326228087525288,
    'kappa': 0.6152092853635318,
    'kappa_max': 0.8013483423275729,
    'logloss': 0.08353786508343729,
    'num_tasks_agg': 20.0,
    'num_tasks_total': 100.0,
    'p_f1_max': 0.6366572086059022,
    'p_kappa_max': 0.7515333876013757,
    'roc_auc_score': 0.8945680896661536}


## Post Training 

In [22]:
#print("DEBUG data for hidden spliting")
#print (f"Classification mask: Sum = {net.classmask.sum()}\t Uniques: {np.unique(net.classmask)}")
#print (f"Regression mask:     Sum = {net.regmask.sum()}\t Uniques: {np.unique(net.regmask)}")
#print (f"overlap: {(net.regmask * net.classmask).sum()}")

writer.close()
vprint()
if args.profile == 1:
   multiplexer = sc.create_multiplexer(tb_name)
#   sc.export_scalars(multiplexer, '.', "GPUmem", "testcsv.csv")
   data = sc.extract_scalars(multiplexer, '.', "GPUmem")
   vprint(f"Peak GPU memory used: {sc.return_max_val(data)}MB")
vprint("Saving performance metrics (AUCs) and model.")

#####   model saving   #####
if not os.path.exists(args.output_dir):
   os.makedirs(args.output_dir)

model_file = f"{args.output_dir}/{args.name}.pt"
out_file   = f"{args.output_dir}/{args.name}.json"

if args.save_model:
   torch.save(net.state_dict(), model_file)
   vprint(f"Saved model weights into '{model_file}'.")

results_va["classification"]["num_pos"] = num_pos_va
results_va["classification"]["num_neg"] = num_neg_va
results_va["regression"]["num_samples"] = num_regr_va

if results_tr is not None:
    results_tr["classification"]["num_pos"] = num_pos - num_pos_va
    results_tr["classification"]["num_neg"] = num_neg - num_neg_va
    results_tr["regression"]["num_samples"] = num_regr - num_regr_va

stats=None
if args.normalize_regression == 1 :
   stats={}
   stats["mean"] = mean_save
   stats["var"]  = np.array(var_save)[0]
sc.save_results(out_file, args, validation=results_va, training=results_tr, stats=stats)

vprint(f"Saved config and results into '{out_file}'.\nYou can load the results by:\n  import sparsechem as sc\n  res = sc.load_results('{out_file}')")


Saving performance metrics (AUCs) and model.
Saved model weights into '../experiments/mini-SparseChem/0414_1250/sc_50_lr0.001_do0.0.pt'.
Saved config and results into '../experiments/mini-SparseChem/0414_1250/sc_50_lr0.001_do0.0.json'.
You can load the results by:
  import sparsechem as sc
  res = sc.load_results('../experiments/mini-SparseChem/0414_1250/sc_50_lr0.001_do0.0.json')


In [23]:
ns.wandb_run.finish()




VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

You should always run with libnvidia-ml.so that is installed with your
NVIDIA Display Driver. By default it's installed in /usr/lib and /usr/lib64.
libnvidia-ml.so in GDK package is a stub library that is attached only for
build purposes (e.g. machine that you build your application doesn't have
to have Display Driver installed).
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Linked to libnvidia-ml library at wrong path : /usr/lib64/libnvidia-ml.so.1


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

You should always run with libnvidia-ml.so that is installed with your
NVIDIA Display Driver. By default it's installed in /usr/lib and /usr/lib64.
libnvidia-ml.so in GDK package is a stub library that is attached only for
build purposes (e.g. machine that you build your application doesn't have
to have Display Driver installed).
!!!!!!!!!!!!!!!

0,1
auc_pr,▁▇▆▇▇▇█▇▇█▇███▇██▇██████████████████████
auc_pr_cal,▁▆▅▆▇▇▇▆▆▇▆█▇▇▇▆▆▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
avg_prec_score,▁▇▆▇▇▇▇▇▇█▇███▇▇█▇██████████████████████
bceloss,▆▁▂▃▄▄▄▄▄▅▅▅▅▄▆▅▅▅▇▆▆▆▆▇▇▆▇▇█▇█▇▇███▇▇▇█
best_accuracy,▁▂▇▇▇████
best_epoch,▁▁▁▁▂▃▄▇█
f1_max,▁▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
kappa,▁▆▆▇▇▇▆▆▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇█
kappa_max,▁▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇▇▇▇▇▇█████████████████
logloss,█▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▃▄▃▃▄▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▄

0,1
auc_pr,0.93237
auc_pr_cal,0.79555
avg_prec_score,0.93618
bceloss,0.29972
f1_max,0.94027
kappa,0.60585
kappa_max,0.79729
logloss,0.0851
num_tasks_agg,20.0
num_tasks_total,100.0


## Results 

In [59]:
results_va['classification'][0:20]

Unnamed: 0_level_0,roc_auc_score,auc_pr,avg_prec_score,f1_max,p_f1_max,kappa,kappa_max,p_kappa_max,bceloss,auc_pr_cal,num_pos,num_neg
task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,0.98798,0.998324,0.9983,0.982301,0.984143,0.786753,0.877266,0.984143,0.116708,0.975328,115,17
1,,,,,,,,,,,31,0
2,,,,,,,,,,,17,0
3,,,,,,,,,,,54,0
4,,,,,,,,,,,7,0
5,0.944444,0.996995,0.997076,0.972973,0.422583,-0.055556,0.641509,0.997202,0.120043,0.94986,18,1
6,1.0,1.0,1.0,1.0,0.989261,0.797642,1.0,0.989261,0.016063,1.0,203,3
7,1.0,1.0,1.0,1.0,0.943416,0.0,1.0,0.943416,1.419727,1.0,3,4
8,0.990909,0.999672,0.999675,0.990991,0.968706,0.0,0.791209,0.983577,0.10724,0.985099,55,2
9,1.0,1.0,1.0,1.0,0.999539,0.66087,1.0,0.999539,0.017546,1.0,76,2


In [60]:
pp.pprint(results_va)

{   'classification':       roc_auc_score    auc_pr  avg_prec_score    f1_max  p_f1_max     kappa  \
task                                                                          
0           0.98798  0.998324          0.9983  0.982301  0.984143  0.786753   
1               NaN       NaN             NaN       NaN       NaN       NaN   
2               NaN       NaN             NaN       NaN       NaN       NaN   
3               NaN       NaN             NaN       NaN       NaN       NaN   
4               NaN       NaN             NaN       NaN       NaN       NaN   
...             ...       ...             ...       ...       ...       ...   
95              NaN       NaN             NaN       NaN       NaN       NaN   
96              NaN       NaN             NaN       NaN       NaN       NaN   
97              NaN       NaN             NaN       NaN       NaN       NaN   
98          1.00000  1.000000          1.0000  1.000000  0.834234  1.000000   
99              NaN       NaN 

In [98]:
print(rstr)

0412_1319


## Misc

In [33]:
import wandb
def restart_wandb(exp_id, exp_name, project_name, resume = "allow" ):
    print(exp_id, exp_name, project_name) 
    wandb_run = wandb.init(project = project_name, 
                                     entity  = "kbardool", 
                                     id      = exp_id, 
                                     name    = exp_name,
                                     resume=resume )
    
    print(f" PROJECT NAME: {wandb_run.project}\n"
          f" RUN ID      : {wandb_run.id} \n"
          f" RUN NAME    : {wandb_run.name}")     
 
    return wandb_run 


In [34]:
run = restart_wandb("d2rw3bdq","0413_0509","SparseChem-Mini")

d2rw3bdq 0413_0509 SparseChem-Mini





VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

You should always run with libnvidia-ml.so that is installed with your
NVIDIA Display Driver. By default it's installed in /usr/lib and /usr/lib64.
libnvidia-ml.so in GDK package is a stub library that is attached only for
build purposes (e.g. machine that you build your application doesn't have
to have Display Driver installed).
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Linked to libnvidia-ml library at wrong path : /usr/lib64/libnvidia-ml.so.1



 PROJECT NAME: SparseChem-Mini
 RUN ID      : d2rw3bdq 
 RUN NAME    : 0413_0509


In [43]:
print(run)

NameError: name 'run' is not defined

In [41]:
run.finish()

In [42]:
del run

In [7]:
# cmd = (
#   f" --x       /home/kbardool/kusanagi/MLDatasets/chembl_23mini_synthetic/chembl_23mini_x.npy " +
#   f" --y_class /home/kbardool/kusanagi/MLDatasets/chembl_23mini_synthetic/chembl_23mini_adashare_y_all_bin_sparse.npy " +
#   f" --folding /home/kbardool/kusanagi/MLDatasets/chembl_23mini_synthetic/chembl_23mini_folds.npy " +
#   f" --output_dir {output_dir}" +    
#   f" --fold_va           0 " +
#   f" --batch_ratio    0.02 " +
#   f" --hidden_sizes   25 25 25 25 25 25 " +
#   f" --dropouts_trunk  0  0  0  0  0  0 " +
#   f" --weight_decay   1e-4 " 
#   f" --epochs           40 " +
#   f" --lr             1e-3 " +
#   f" --lr_steps         10 " +
#   f" --lr_alpha        0.3" 
# )

# cmd = (
#   f" --x       {data_dir}/chembl_23mini_x.npy " +
#   f" --y_class {data_dir}/chembl_23mini_adashare_y_all_bin_sparse.npy " +
#   f" --folding {data_dir}/chembl_23mini_folds.npy " +
#   f" --output_dir {output_dir}" +    
#   f" --fold_va            0 " +
#   f" --batch_ratio     0.02 " +
#   f" --hidden_sizes   40 40 " +
#   f" --dropouts_trunk  0  0 " +
#   f" --weight_decay   1e-4 " +
#   f" --epochs           20 " +
#   f" --lr             1e-3 " +
#   f" --lr_steps         10 " +
#   f" --lr_alpha        0.3 " 
# )

#   f" --hidden_sizes   400 400 " +
#   f" --last_dropout   0.2 " +
#   f" --middle_dropout 0.2 " +
#   f" --x       ./{data_dir}/chembl_23_x.mtx " +
#   f" --y_class ./{data_dir}/chembl_23_y.mtx " +
#   f" --folding ./{data_dir}/folding_hier_0.6.npy " +

#### copied from SparseChemDev 

# cmd = (
#         f" --x       ./{data_dir}/chembl_23mini_x.npy" +
#         f" --y_class ./{data_dir}/chembl_23mini_y.npy" +
#         f" --folding ./{data_dir}/chembl_23mini_folds.npy" +
#         f" --hidden_sizes 20 30 40 " +  
#         f" --output_dir {output_dir}" +
#         f" --batch_ratio 0.1" +
#         f" --epochs 2" +
#         f" --lr 1e-3" +
#         f" --lr_steps 1" +
#         f" --dev {dev}" +
#         f" --verbose 1")
#         f" --input_size_freq  40"
#         f" --tail_hidden_size  10"

In [None]:
# data_dir="chembl23_data"
# data_dir="chembl23_run_01152022"
# rstr = "synthetic_data_model" ##random_str(12)
# rstr = "synthetic_data_model_03042022" ##random_str(12)
# output_dir = f"./models-{rstr}/"
# output_dir = f"./{data_dir}/models-{rstr}/"
# output dir kbardool/kusanagi/experiments/SparseChem/0116_0843


In [4]:
# dev = "gpu" 
# data_dir="chembl23_data"
# data_dir="chembl23_run_01152022"
# data_dir = "/home/kbardool/kusanagi/MLDatasets/chembl_23mini_synthetic"

# rm_output=False

# rstr = datetime.now().strftime("%m%d_%H%M")
# rstr = "synthetic_data_model" ##random_str(12)
# rstr = "synthetic_data_model_03042022" ##random_str(12)

# output_dir = f"./models-{rstr}/"
# output_dir = f"./{data_dir}/models-{rstr}/"

# output dir kbardool/kusanagi/experiments/SparseChem/0116_0843
# output_dir = f"/home/kbardool/kusanagi/experiments/SparseChem/{rstr}"
# print(output_dir)