In [1]:
import random, os

import numpy as np
import torch
from rdkit import RDLogger

from grover.util.parsing import parse_args, get_newest_train_args
from grover.util.utils import create_logger
from task.cross_validate import cross_validate, randomsearch, gridsearch, make_confusion_matrix
from task.fingerprint import generate_fingerprints, generate_embvec
from task.predict import make_predictions, write_prediction
from task.pretrain import pretrain_model, subset_learning
from grover.data.torchvocab import MolVocab

from grover.topology.mol_tree import *

#add for gridsearch
from argparse import ArgumentParser, Namespace
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

using Horovod for multi-GPU training


In [2]:
import csv
import logging
import os
import pickle
import time
from argparse import Namespace
from logging import Logger
from typing import List

import numpy as np
import pandas as pd
import torch
from torch.optim.lr_scheduler import ExponentialLR
from torch.utils.data import DataLoader

from grover.data import MolCollator
from grover.data import StandardScaler
from grover.util.metrics import get_metric_func
from grover.util.nn_utils import initialize_weights, param_count
from grover.util.scheduler import NoamLR
from grover.util.utils import build_optimizer, build_lr_scheduler, makedirs, load_checkpoint, get_loss_func, \
    save_checkpoint, build_model
from grover.util.utils import get_class_sizes, get_data, split_data, get_task_names
from task.predict import predict, evaluate, evaluate_predictions

In [3]:
def setup(seed):
    # frozen random seed
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

In [9]:
from grover.util.parsing import *
def parse_args() -> Namespace:
    """
    Parses arguments for training and testing (includes modifying/validating arguments).

    :return: A Namespace containing the parsed, modified, and validated args.
    """
    parser = ArgumentParser()
    subparser = parser.add_subparsers(title="subcommands",
                                      dest="parser_name",
                                      help="Subcommands for fintune, prediction, and fingerprint.")
    parser_finetune = subparser.add_parser('finetune', help="Fine tune the pre-trained model.")
    add_finetune_args(parser_finetune)
    parser_eval = subparser.add_parser('eval', help="Evaluate the results of the pre-trained model.")
    add_finetune_args(parser_eval)
    parser_predict = subparser.add_parser('predict', help="Predict results from fine tuned model.")
    add_predict_args(parser_predict)
    parser_fp = subparser.add_parser('fingerprint', help="Get the fingerprints of SMILES.")
    add_fingerprint_args(parser_fp)
    parser_pretrain = subparser.add_parser('pretrain', help="Pretrain with unlabelled SMILES.")
    add_pretrain_args(parser_pretrain)

    #args = parser.parse_args(['eval','--data_path','data/tg407.csv','--features_path','data/tg407.npz','--save_dir','model/test','--checkpoint_path','grover_large.pt','--self_attention','--no_features_scaling','--split_type','scaffold_balanced','--epochs','3','--ffn_hidden_size','900','--num_folds','3','--batch_size','32'])
    args = parser.parse_args(['eval', '--data_path', 'data/tg423.csv', '--features_path', 'data/tg423.npz', '--dataset_type', 'classification','--split_type', 'scaffold_balanced', '--no_features_scaling' ,'--num_folds', '3','--checkpoint_dir', 'model/tg423_RS1/iter_16', '--confusionmatrix'])

    if args.parser_name == 'finetune' or args.parser_name == 'eval':
        modify_train_args(args)
    elif args.parser_name == "pretrain":
        modify_pretrain_args(args)
    elif args.parser_name == 'predict':
        modify_predict_args(args)
    elif args.parser_name == 'fingerprint':
        modify_fingerprint_args(args)

    return args

# eval

In [6]:
from argparse import Namespace
from logging import Logger
from typing import List

import numpy as np
import torch
import torch.utils.data.distributed

from grover.data.scaler import StandardScaler
from grover.util.utils import get_class_sizes, get_data, split_data, get_task_names, get_loss_func
from grover.util.utils import load_checkpoint
from task.predict import evaluate_predictions, evaluate_predictions_cfm
from grover.util.metrics import get_metric_func
from grover.util.nn_utils import param_count
from task.predict import predict

In [23]:
# setup random seed
setup(seed=42)
# Avoid the pylint warning.
a = MolVocab
# supress rdkit logger
lg = RDLogger.logger()
lg.setLevel(RDLogger.CRITICAL)

# Initialize MolVocab
mol_vocab = MolVocab

args = parse_args()

logger = create_logger(name='eval', save_dir=args.save_dir, quiet=False)

In [24]:
if logger is not None:
    debug, info = logger.debug, logger.info
else:
    debug = info = print

torch.cuda.set_device(0)

# Get data
debug('Loading data')
args.task_names = get_task_names(args.data_path)
data = get_data(path=args.data_path, args=args, logger=logger)
args.num_tasks = data.num_tasks()
args.features_size = data.features_size()
debug(f'Number of tasks = {args.num_tasks}')

# Split data
debug(f'Splitting data with seed {args.seed}')

train_data, val_data, test_data = split_data(data=data,
                                             split_type=args.split_type,
                                             sizes=[0.8, 0.1, 0.1],
                                             seed=args.seed,
                                             args=args,
                                             logger=logger)

if args.dataset_type == 'classification':
    class_sizes = get_class_sizes(data, args)
    debug('Class sizes')
    if args.multi_class : 
        for i, task_class_sizes in enumerate(class_sizes):
            debug(f'{i}th class size '
                  f'{", ".join(f"{cls}: {size * 100:.2f}%" for cls, size in enumerate(task_class_sizes))}')

    else : 
        for i, task_class_sizes in enumerate(class_sizes):
            debug(f'{args.task_names[i]} '
                  f'{", ".join(f"{cls}: {size * 100:.2f}%" for cls, size in enumerate(task_class_sizes))}')

if args.features_scaling:
    features_scaler = train_data.normalize_features(replace_nan_token=0)
    val_data.normalize_features(features_scaler)
    test_data.normalize_features(features_scaler)
else:
    features_scaler = None

args.train_data_size = len(train_data)

debug(f'Total size = {len(data):,} | '
      f'train size = {len(train_data):,} | val size = {len(val_data):,} | test size = {len(test_data):,}')

# Initialize scaler  (regression only)
scaler = None
if args.dataset_type == 'regression':
    debug('Fitting scaler')
    _, train_targets = train_data.smiles(), train_data.targets()
    scaler = StandardScaler().fit(train_targets)
    scaled_targets = scaler.transform(train_targets).tolist()
    train_data.set_targets(scaled_targets)

    val_targets = val_data.targets()
    scaled_val_targets = scaler.transform(val_targets).tolist()
    val_data.set_targets(scaled_val_targets)

metric_func = get_metric_func(metric=args.metric)

# Set up test set evaluation
test_smiles, test_targets = test_data.smiles(), test_data.targets()
sum_test_preds = np.zeros((len(test_smiles), args.num_tasks))

# Load/build model
if args.checkpoint_paths is not None:
    cur_model = args.seed
    target_path = []
    for path in args.checkpoint_paths:
        if "fold_%d" % cur_model in path:
            target_path = path
    debug(f'Loading model {args.seed} from {target_path}')
    model = load_checkpoint(target_path, current_args=args, cuda=args.cuda, logger=logger)
    # Get loss and metric functions
    loss_func = get_loss_func(args, model)

debug(f'Number of parameters = {param_count(model):,}')

Loading data
Loading data
Number of tasks = 1
Number of tasks = 1
Splitting data with seed 0
Splitting data with seed 0
100%|##########| 1577/1577 [00:00<00:00, 6148.33it/s]
Total scaffolds = 418 | train scaffolds = 216 | val scaffolds = 102 | test scaffolds = 100
Total scaffolds = 418 | train scaffolds = 216 | val scaffolds = 102 | test scaffolds = 100
Label averages per scaffold, in decreasing order of scaffold frequency,capped at 10 scaffolds and 20 labels: [(array([0.25888325]), array([197])), (array([0.18830243]), array([701])), (array([0.]), array([1])), (array([0.]), array([1])), (array([0.]), array([1])), (array([0.]), array([1])), (array([0.]), array([1])), (array([0.]), array([2])), (array([0.]), array([1])), (array([0.]), array([1]))]
Label averages per scaffold, in decreasing order of scaffold frequency,capped at 10 scaffolds and 20 labels: [(array([0.25888325]), array([197])), (array([0.18830243]), array([701])), (array([0.]), array([1])), (array([0.]), array([1])), (array

In [25]:
test_preds, _ = predict(
        model=model,
        data=test_data,
        batch_size=args.batch_size,
        loss_func=loss_func,
        logger=logger,
        shared_dict={},
        scaler=scaler,
        args=args
    )

In [26]:
preds=test_preds
targets=test_targets
num_tasks=args.num_tasks
dataset_type=args.dataset_type

In [53]:
roc_auc_score(test_targets, test_preds, multi_class='ovr')

0.8670040612295921

In [57]:
threshold=0.5

In [75]:
torch.set_printoptions(precision=16)

In [85]:
preds2 = np.array(preds)

In [84]:
preds2.shape

torch.Size([159, 3])

In [102]:
from sklearn.metrics import accuracy_score, mean_squared_error, roc_auc_score, mean_absolute_error, r2_score, \
    precision_recall_curve, auc, recall_score, confusion_matrix, f1_score, precision_score, classification_report, multilabel_confusion_matrix

In [108]:
torch.tensor(targets).shape

torch.Size([159, 1])

In [122]:
hard_preds = [np.argmax(x) for x in preds]

In [126]:
result = multilabel_confusion_matrix(targets, hard_preds).ravel()

In [131]:
tn=[]
fp=[]
fn=[]
tp=[]
for i in range(args.multi_class_num):
    tn.append(result[4*i])
    fp.append(result[4*i+1])
    fn.append(result[4*i+2])
    tp.append(result[4*i+3])

In [141]:
acc = accuracy_score(targets, hard_preds)
rec = recall_score(targets, hard_preds, average='macro')
prec = precision_score(targets, hard_preds, average='macro')
f1s = f1_score(targets, hard_preds, average='macro')

In [143]:
def confusion_mat_multi(targets: List[int], preds: List[float], threshold: float = 0.5) -> float:
    """
    Computes the specificity of a binary prediction task using a given threshold for generating hard predictions.

    :param targets: A list of binary targets.
    :param preds: A list of prediction probabilities.
    :param threshold: The threshold above which a prediction is a 1 and below which (inclusive) a prediction is a 0
    :return: The computed specificity.
    """
    hard_preds = [np.argmax(x) for x in preds]
    result = multilabel_confusion_matrix(targets, hard_preds).ravel()
    tn=[]
    fp=[]
    fn=[]
    tp=[]
    for i in range(args.multi_class_num):
        tn.append(result[4*i])
        fp.append(result[4*i+1])
        fn.append(result[4*i+2])
        tp.append(result[4*i+3])
    acc = accuracy_score(targets, hard_preds)
    rec = recall_score(targets, hard_preds, average='macro')
    prec = precision_score(targets, hard_preds, average='macro')
    f1s = f1_score(targets, hard_preds, average='macro')
    return acc, rec, prec, f1s, tp, fp, tn, fn

In [46]:
# Compute metric
result_AUC = []
result_ACC = []
result_REC = []
result_PREC = []
result_SPEC = []
result_F1 = []
result_BA = []
result_TP = []
result_FP = []
result_TN = []
result_FN = []
for i in range(num_tasks):
    # # Skip if all targets or preds are identical, otherwise we'll crash during classification
    if dataset_type == 'classification':
        nan = False
        if all(target == 0 for target in valid_targets[i]) or all(target == 1 for target in valid_targets[i]):
            nan = True
            # info('Warning: Found a task with targets all 0s or all 1s')
        if all(pred == 0 for pred in valid_preds[i]) or all(pred == 1 for pred in valid_preds[i]):
            nan = True
            # info('Warning: Found a task with predictions all 0s or all 1s')

        if nan:
            result_AUC.append(float('nan'))
            result_ACC.append(float('nan'))
            result_REC.append(float('nan'))
            result_PREC.append(float('nan'))
            result_SPEC.append(float('nan'))
            result_F1.append(float('nan'))
            result_BA.append(float('nan'))
            result_TP.append(float('nan'))
            result_FP.append(float('nan'))
            result_TN.append(float('nan'))
            result_FN.append(float('nan'))
            continue

    if len(valid_targets[i]) == 0:
        continue

    result_AUC.append(metric_func(valid_targets[i], valid_preds[i]))
    acc, rec, prec, spe, f1s, BA, tp, fp, tn, fn = confusion_mat(valid_targets[i], valid_preds[i])
    result_ACC.append(acc)
    result_REC.append(rec)
    result_PREC.append(prec)
    result_SPEC.append(spe)
    result_F1.append(f1s)
    result_BA.append(BA)
    result_TP.append(tp)
    result_FP.append(fp)
    result_TN.append(tn)
    result_FN.append(fn)

ValueError: multiclass-multioutput format is not supported

# finetune

In [5]:
from grover.data import MoleculeDatapoint, MoleculeDataset, StandardScaler
import csv
import logging
import os
import pickle
import random
from argparse import Namespace
from collections import defaultdict
from logging import Logger
from typing import List, Set, Tuple, Union, Dict

import numpy as np
import torch
from rdkit import Chem
from rdkit.Chem.Scaffolds import MurckoScaffold
from torch import nn as nn
from tqdm import tqdm as core_tqdm

from grover.data import MoleculeDatapoint, MoleculeDataset, StandardScaler
from grover.model.models import GroverFpGeneration, GroverFinetuneTask, GroverEmbvecGeneration
from grover.util.nn_utils import initialize_weights
from grover.util.scheduler import NoamLR

from grover.topology.mol_tree import *
from grover.util.utils import *
from task.train import *

In [13]:
setup(seed=42)
# Avoid the pylint warning.
a = MolVocab
# supress rdkit logger
lg = RDLogger.logger()
lg.setLevel(RDLogger.CRITICAL)
# Initialize MolVocab
mol_vocab = MolVocab
args = parse_args()
logger = create_logger(name='train', save_dir=args.save_dir, quiet=False)

In [14]:

if logger is not None:
    debug, info = logger.debug, logger.info
else:
    debug = info = print


# pin GPU to local rank.
idx = args.gpu
if args.gpu is not None:
    torch.cuda.set_device(idx)

features_scaler, scaler, shared_dict, test_data, train_data, val_data = load_data(args, debug, logger)

metric_func = get_metric_func(metric=args.metric)

# Set up test set evaluation
test_smiles, test_targets = test_data.smiles(), test_data.targets()
if args.multi_class:
    sum_test_preds = np.zeros((len(test_smiles), args.multi_class_num))
else : 
    sum_test_preds = np.zeros((len(test_smiles), args.num_tasks))
    
# Train ensemble of models
for model_idx in range(args.ensemble_size):
    # Tensorboard writer
    save_dir = os.path.join(args.save_dir, f'model_{model_idx}')
    makedirs(save_dir)

    # Load/build model
    if args.checkpoint_paths is not None:
        if len(args.checkpoint_paths) == 1:
            cur_model = 0
        else:
            cur_model = model_idx
        debug(f'Loading model {cur_model} from {args.checkpoint_paths[cur_model]}')
        model = load_checkpoint(args.checkpoint_paths[cur_model], current_args=args, logger=logger)
    else:
        debug(f'Building model {model_idx}')
        model = build_model(model_idx=model_idx, args=args)

    if args.fine_tune_coff != 1 and args.checkpoint_paths is not None:
        debug("Fine tune fc layer with different lr")
        initialize_weights(model_idx=model_idx, model=model.ffn, distinct_init=args.distinct_init)

    # Get loss and metric functions
    loss_func = get_loss_func(args, model)

    optimizer = build_optimizer(model, args)
    
    if args.cuda:
        debug('Moving model to cuda')
        model = model.cuda()
    # Ensure that model is saved in correct location for evaluation if 0 epochs
    save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args)

    # Learning rate schedulers
    scheduler = build_lr_scheduler(optimizer, args)

    # Bulid data_loader
    shuffle = True
    mol_collator = MolCollator(shared_dict={}, args=args)
    train_data = DataLoader(train_data,
                            batch_size=args.batch_size,
                            shuffle=shuffle,
                            num_workers=10,
                            collate_fn=mol_collator)

Loading data
Number of tasks = 1
Splitting data with seed 0
100%|##########| 1019/1019 [00:00<00:00, 4268.18it/s]
Total scaffolds = 281 | train scaffolds = 137 | val scaffolds = 69 | test scaffolds = 75
Label averages per scaffold, in decreasing order of scaffold frequency,capped at 10 scaffolds and 20 labels: [(array([1.05369128]), array([149])), (array([0.73318386]), array([446])), (array([0.]), array([1])), (array([0.]), array([1])), (array([0.]), array([1])), (array([1.]), array([1])), (array([2.]), array([1])), (array([0.]), array([1])), (array([0.]), array([1])), (array([0.]), array([3]))]
Class sizes
Total size = 1,019 | train size = 815 | val size = 101 | test size = 103
Loading model 0 from grover_large.pt


0 : 0.44
1 : 0.33
2 : 0.19
3 : 0.05


Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_q.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_q.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_k.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_k.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_v.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_v.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.1.mpn_q.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.1.mpn_q.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.1.mpn_k.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.1.mpn_k.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.1.mpn_v.act_func.weight".
Loading pretr

In [15]:
model.train()

# data.shuffle()

loss_sum, iter_count = 0, 0
cum_loss_sum, cum_iter_count = 0, 0


mol_collator = MolCollator(shared_dict=shared_dict, args=args)

num_workers = 4
if type(train_data) == DataLoader:
    mol_loader = train_data
else:
    mol_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True,
                        num_workers=num_workers, collate_fn=mol_collator)

for _, item in enumerate(mol_loader):
    _, batch, features_batch, mask, targets = item
    if next(model.parameters()).is_cuda:
        mask, targets = mask.cuda(), targets.cuda()
    class_weights = torch.ones(targets.shape)

    if args.cuda:
        class_weights = class_weights.cuda()

    # Run model
    model.zero_grad()
    preds = model(batch, features_batch)

In [16]:
preds[0]

tensor([[-0.3956,  1.0293, -2.0984,  1.2230],
        [ 0.3215,  0.4533, -1.3846,  1.3530],
        [-0.1450,  0.6166, -2.1879,  1.9209],
        [-1.4344,  0.9410, -2.3566,  1.5307],
        [ 0.2092,  0.8772, -1.3095,  1.3412],
        [ 1.2265,  0.4093, -0.9116,  1.1034],
        [-0.2924,  0.8900, -1.0518,  0.4056],
        [-0.5966,  0.6896,  0.3310,  1.6052],
        [ 0.5463,  0.2386, -1.1186,  0.9212],
        [-0.0789,  1.7566, -1.0784,  0.5090],
        [-0.1155,  0.1876, -2.4077,  0.7775],
        [-0.3261,  0.3426, -2.3022,  0.4938],
        [-0.8970,  1.0712, -2.8677,  1.6920],
        [ 0.4045,  0.0566, -1.3239,  1.3198],
        [-0.0325,  0.9464, -1.9318,  1.3852]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [26]:
targets.shape

torch.Size([15, 1])

In [18]:
pred_loss = nn.CrossEntropyLoss(reduction='none')

In [21]:
pred_loss1 = pred_loss(preds[0], targets.squeeze().to(torch.long))
pred_loss2 = pred_loss(preds[1], targets.squeeze().to(torch.long))

In [44]:
pred_loss1

tensor([2.3404, 1.5029, 4.4555, 3.4516, 1.8360, 0.8935, 1.9080, 1.4981, 1.5254,
        3.2441, 1.2857, 3.6551, 3.0733, 1.8252, 1.0929], device='cuda:0',
       grad_fn=<NllLossBackward0>)

In [22]:
dist_loss = nn.MSELoss(reduction='none')
dist = dist_loss(preds[0], preds[1])

In [24]:
loss = pred_loss1 + pred_loss2 + dist.mean(dim=1)

In [27]:
loss.shape

torch.Size([15])

In [29]:
loss = loss.mean()

In [30]:
loss.dtype

torch.float32

In [32]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
input1 = torch.randn(3, 5, requires_grad=True)
target1 = torch.empty(3, dtype=torch.long).random_(5)
output1 = loss(input1, target1)
output1.backward()

In [50]:
target1

tensor([1, 3, 4])

In [48]:
# Example of target with class probabilities
input2 = torch.randn(3, 5, requires_grad=True)
target2 = torch.randn(3, 5).softmax(dim=1)
output2 = loss(input2, target2)
output2.backward()

In [58]:
dist.mean(dim=0).shape

torch.Size([4])

In [59]:
dist.mean(dim=1).shape

torch.Size([15])

In [9]:
# Run training
best_score = float('inf') if args.minimize_score else -float('inf')
best_epoch, n_iter = 0, 0
min_val_loss = float('inf')
for epoch in range(2):
    s_time = time.time()
    n_iter, train_loss = train(
        epoch=epoch,
        model=model,
        data=train_data,
        loss_func=loss_func,
        optimizer=optimizer,
        scheduler=scheduler,
        args=args,
        n_iter=n_iter,
        shared_dict=shared_dict,
        logger=logger
    )
    t_time = time.time() - s_time
    s_time = time.time()
    val_scores, val_loss = evaluate(
        model=model,
        data=val_data,
        loss_func=loss_func,
        num_tasks=args.num_tasks,
        metric_func=metric_func,
        batch_size=args.batch_size,
        dataset_type=args.dataset_type,
        scaler=scaler,
        shared_dict=shared_dict,
        logger=logger,
        args=args
    )
    v_time = time.time() - s_time


    # Average validation score
    avg_val_score = np.nanmean(val_scores)


    # Logged after lr step
    if isinstance(scheduler, ExponentialLR):
        scheduler.step()

    if args.show_individual_scores:
        # Individual validation scores
        for task_name, val_score in zip(args.task_names, val_scores):
            debug(f'Validation {task_name} {args.metric} = {val_score:.6f}')
    print('Epoch: {:04d}'.format(epoch),
          'loss_train: {:.6f}'.format(train_loss),
          'loss_val: {:.6f}'.format(val_loss),
          f'{args.metric}_val: {avg_val_score:.4f}',
          # 'auc_val: {:.4f}'.format(avg_val_score),
          'cur_lr: {:.5f}'.format(scheduler.get_lr()[-1]),
          't_time: {:.4f}s'.format(t_time),
          'v_time: {:.4f}s'.format(v_time))

    if args.tensorboard:
        writer.add_scalar('loss/train', train_loss, epoch)
        writer.add_scalar('loss/val', val_loss, epoch)
        writer.add_scalar(f'{args.metric}_val', avg_val_score, epoch)

    if args.wandb :         
        wandb.log({"val_loss" : val_loss, "val_metrics" : val_scores})


    # Save model checkpoint if improved validation score
    if args.select_by_loss:
        if val_loss < min_val_loss:
            min_val_loss, best_epoch = val_loss, epoch
            save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args)
    else:
        if args.minimize_score and avg_val_score < best_score or \
                not args.minimize_score and avg_val_score > best_score:
            best_score, best_epoch = avg_val_score, epoch
            save_checkpoint(os.path.join(save_dir, 'model.pt'), model, scaler, features_scaler, args)

    if epoch - best_epoch > args.early_stop_epoch:
        break

Epoch: 0000 loss_train: 69.688618 loss_val: 26.584674 auc_val: 0.5686 cur_lr: 0.00059 t_time: 4.8184s v_time: 0.4729s
Epoch: 0001 loss_train: 65.542918 loss_val: 26.671691 auc_val: 0.5558 cur_lr: 0.00076 t_time: 4.2996s v_time: 0.4478s


In [10]:
ensemble_scores = 0.0

# Evaluate on test set using model with best validation score
if args.select_by_loss:
    info(f'Model {model_idx} best val loss = {min_val_loss:.6f} on epoch {best_epoch}')
else:
    info(f'Model {model_idx} best validation {args.metric} = {best_score:.6f} on epoch {best_epoch}')
model = load_checkpoint(os.path.join(save_dir, 'model.pt'), cuda=args.cuda, logger=logger)

test_preds, _ = predict(
    model=model,
    data=test_data,
    loss_func=loss_func,
    batch_size=args.batch_size,
    logger=logger,
    shared_dict=shared_dict,
    scaler=scaler,
    args=args
)

test_scores = evaluate_predictions(
    preds=test_preds,
    targets=test_targets,
    num_tasks=args.num_tasks,
    metric_func=metric_func,
    dataset_type=args.dataset_type,
    arg=args,
    logger=logger
)


Model 0 best val loss = 26.584674 on epoch 0
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_q.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_q.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_k.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_k.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_v.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.0.mpn_v.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.1.mpn_q.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.1.mpn_q.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.1.mpn_k.act_func.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.heads.1.mpn_k.W_h.weight".
Loading pretrained parameter "grover.encoders.edge_blocks.0.

In [11]:
if len(test_preds) != 0:
    sum_test_preds += np.array(test_preds, dtype=float)

# Average test score
avg_test_score = np.nanmean(test_scores)
info(f'Model {model_idx} test {args.metric} = {avg_test_score:.6f}')

if args.show_individual_scores:
    # Individual test scores
    for task_name, test_score in zip(args.task_names, test_scores):
        info(f'Model {model_idx} test {task_name} {args.metric} = {test_score:.6f}')

# Evaluate ensemble on test set
avg_test_preds = (sum_test_preds / args.ensemble_size).tolist()

ensemble_scores = evaluate_predictions(
    preds=avg_test_preds,
    targets=test_targets,
    num_tasks=args.num_tasks,
    metric_func=metric_func,
    dataset_type=args.dataset_type,
    arg=args,
    logger=logger
)


Model 0 test auc = 0.600428


In [31]:
ind = [['preds'] * args.num_tasks + ['targets'] * args.num_tasks, args.task_names * 2]
ind = pd.MultiIndex.from_tuples(list(zip(*ind)))
if args.multi_class:
    data = np.concatenate([np.array([np.argmax(x) for x in avg_test_preds]).reshape(-1,1), np.array(test_targets)], 1)
else:
    data = np.concatenate([np.array(avg_test_preds), np.array(test_targets)], 1)

In [32]:
data

array([[2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 1.],
       [2., 1.],
       [2., 2.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [2., 1.],
       [2., 1.],
       [2., 1.],
       [2., 2.],
       [2., 1.],
       [2., 0.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 1.],
       [2., 1.],
       [2., 1.],
       [2., 2.],
       [1., 0.],
       [2., 1.],
       [1., 0.],
       [2., 1.],
       [2., 1.],
       [1., 0.],
       [1., 2.],
       [2., 1.],
       [2., 1.],
       [2., 1.],
       [2., 2.],
       [2., 2.],
       [2., 2.],
       [2., 1.],
       [2., 1.],
       [1., 1.],
       [2., 0.],
       [2., 2.],
       [2., 1.],
       [2., 1.],
       [2., 2.],
       [1., 0.],
       [2., 0.],
       [2., 2.],
       [2., 2.],
       [2., 0.],
       [2., 2.],
       [2., 1.

In [29]:
np.array(test_targets).shape

(104, 1)

In [33]:
test_result = pd.DataFrame(data, index=test_smiles, columns=ind)
test_result.to_csv(os.path.join(args.save_dir, 'test_result.csv'))

In [44]:
import torch.nn.functional as F

In [49]:
pred_loss = torch.nn.CrossEntropyLoss()

In [65]:
targets.squeeze().to(torch.double)

tensor([2., 2., 1., 2., 0., 1., 2., 1., 1., 2., 1., 2., 1., 2., 1., 2., 0., 1.,
        1., 1., 2., 1., 2., 2., 2., 1., 2., 2., 2., 2., 0., 1.],
       device='cuda:0', dtype=torch.float64)

In [69]:
pred_loss(preds[0], targets.squeeze().to(torch.long))

tensor([0.3708, 2.2572, 0.8642, 2.5354, 1.3634, 1.9989, 0.2592, 0.3828, 0.8414,
        1.5310, 1.2287, 2.2028, 0.2887, 0.4733, 1.0140, 2.8086, 1.5943, 0.8936,
        0.9096, 0.7167, 1.6020, 1.5150, 0.4911, 1.2378, 2.2452, 0.7885, 1.2347,
        2.8424, 3.4238, 0.9983, 1.2706, 0.4381], device='cuda:0',
       grad_fn=<NllLossBackward0>)

In [36]:
torch.(torch.softmax(torch.tensor(preds[0]),dim=1))

  """Entry point for launching an IPython kernel.


tensor(82, device='cuda:0')