In [None]:
#!/usr/bin/env python3
"""
This script loads the following:
-- data/<data_name>/<data_name>.json,
-- data/<data_name>/rf_results/selected_feats.txt,
-- <data_name>_raw_<use_rdkit>.csv,
-- and <data_name>_train_test_idxs.pickle
to split the data and train the GNN model on a 
data split. It outputs the R2 score and logs to 
-- output/<model_name>/
"""



In [None]:
%cd /afs/crc.nd.edu/user/m/msaebi/Public/chemistry/yield_rxn

In [1]:
import os
import sys
import json
import warnings
import argparse
import logging
import pickle
import pandas as pd
from collections import defaultdict

import rdkit
import rdkit.Chem as Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw

import torch
from torch import randperm
from torch.utils.data import DataLoader
from torch._utils import _accumulate
from torch.utils.data import Dataset,Subset

from rxntorch.containers.reaction import Rxn
from rxntorch.containers.dataset import RxnGraphDataset as RxnGD
from rxntorch.utils import collate_fn
from rxntorch.models.yield_network import YieldNet, YieldTrainer
from sklearn.preprocessing import StandardScaler

#warnings.filterwarnings("ignore")
parser = argparse.ArgumentParser()

parser.add_argument("-p", "--dataset_path", type=str, default='./data/', help="train dataset")
parser.add_argument("-dn", "--dataset_name", required=True, type=str, help="dataset name. Options: az (AstraZeneca),dy (Doyle),su (Suzuki)")
parser.add_argument("-op", "--output_path", type=str, default='./output/', help="saved model path")
parser.add_argument("-o", "--output_name", required=True, type=str, help="e.g. rxntorch.model")
#parser.add_argument("-vr", "--valid_ratio", type=float, default=0.7, help="Ratio of samples to reserve for validation")
parser.add_argument("-sn", "--split_set_num", type=int, default=1, help="Choose one split set for train and test. Options: 1-10")

parser.add_argument("-dr", "--dropout_rate", type=float, default=0.04, help="Ratio of samples to reserve for valid data")
parser.add_argument("-b", "--batch_size", type=int, default=40, help="number of batch_size")
parser.add_argument("-tb", "--test_batch_size", type=int, default=None, help="batch size for evaluation")
parser.add_argument("-e", "--epochs", type=int, default=200, help="number of epochs")
parser.add_argument("-hs", "--hidden", type=int, default=200, help="hidden size of model layers")
parser.add_argument("-l", "--layers", type=int, default=2, help="number of layers")

parser.add_argument("--lr", type=float, default=1e-2, help="learning rate of the optimizer")
parser.add_argument("-lrd", "--lr_decay", type=float, default=0.5, help="Decay factor for reducing the learning rate")
parser.add_argument("-lrs", "--lr_steps", type=int, default=10000,help="Number of steps between learning rate decay")

parser.add_argument("-awd","--adam_weight_decay", type=float, default=0.0, help="weight_decay of adam")
parser.add_argument("--adam_beta1", type=float, default=0.9, help="adam first beta value")
parser.add_argument("--adam_beta2", type=float, default=0.999, help="adam second beta value")

parser.add_argument("-gc", "--grad_clip", type=float, default=None, help="value for gradient clipping")
parser.add_argument("-pw", "--pos_weight", type=float, default=None, help="Weights positive samples for imbalance")

parser.add_argument("-w", "--num_workers", type=int, default=4, help="dataloader worker size")
parser.add_argument("--with_cuda", type=bool, default=True, help="training with CUDA: true, or false")
parser.add_argument("--cuda_devices", type=int, nargs='*', default=None, help="CUDA device ids")

parser.add_argument("--log_freq", type=int, default=100, help="printing loss every n iter: setting n")
parser.add_argument("--seed", type=int, default=0, help="random seed")
parser.add_argument("-ud","--use_domain", type=str, required=True, help="use domain features or not. options: rdkit: combination od rdkit feature and bozhao features. no_rdkit: only bozhao features. no_domain: neither.")
parser.add_argument("-mb","--max_nbonds", type=int, default=15, help="maximum number of bonds for binary features")
parser.add_argument("-ma","--max_natoms", type=int, default=15, help="maximum number of atoms for binary features")
parser.add_argument("--abs", type=int, default=1, help="Take the average over aboslute value of predicted yield")





_StoreAction(option_strings=['--abs'], dest='abs', nargs=None, const=None, default=1, type=<class 'int'>, choices=None, help='Take the average over aboslute value of predicted yield', metavar=None)

In [2]:
args = parser.parse_args(args=["-dn", "az",
                              "-o" ,"az_model_test",
                              "--use_domain","no_domain",
                              "-gc","0.8",
                               "-sn" ,"2"])

In [19]:

#torch.cuda.set_device(args.gpu)
torch.manual_seed(args.seed )
torch.cuda.manual_seed_all(args.seed )


#input specs
data_type=args.dataset_name

#if use_domain=no_domain, just load either rdkit or no_rdkit .csv file and the
#set domain features to 0.
ext= '_'+args.use_domain if 'rdkit' in args.use_domain else '_no_rdkit' 
data_path = os.path.join(args.dataset_path,data_type)
processed_path = os.path.join(data_path,'processed')

input_split_idx_file = os.path.join(processed_path,'train_test_idxs.pickle')
processed_data_file = os.path.join(processed_path,''.join([data_type, ext,'.csv']))
selected_features_fn = os.path.join(data_path,'rf_results','selected_feats.txt')


#output specs
# Saves model scores and the model itself in output_path
gc= 'gc' if args.grad_clip else ''

model_name = '-'.join(map(str,[args.output_name, gc, args.use_domain, args.seed, args.hidden, args.layers, args.epochs, args.lr, args.lr_decay, args.lr_steps,args.batch_size]))

output_path= os.path.join(args.output_path ,model_name)
if not os.path.exists(output_path):
    os.mkdir(output_path)

logfile = '.'.join((args.output_name, "log"))
logpath = os.path.join(output_path, logfile)
logging.basicConfig(level=logging.INFO, style='{', format="{asctime:s}: {message:s}",
                    datefmt="%m/%d/%y %H:%M:%S", handlers=(
                    logging.FileHandler(logpath), logging.StreamHandler()))

################################################################
#load_train_test sets
#################################################################
split_set_num= args.split_set_num

with open(input_split_idx_file, 'rb') as handle:
    idx_dict = pickle.load(handle)
    
selected_features = open(selected_features_fn,'r').readlines()[0].split(',')

logging.info("Loading Dataset in {dataset}".format( dataset=processed_data_file))
logging.info("Using the split set number {split}".format( split=split_set_num))

#################################
#modified
#################################

df=pd.read_csv(processed_data_file,index_col=0)
train_set= df.iloc[idx_dict['train_idx'][split_set_num]]
test_set = df.iloc[idx_dict['test_idx'][split_set_num]]

smiles_feature_names = ["id","yield","reactant_smiles","solvent_smiles","base_smiles","product_smiles"]
#domain_feature_names = ["yield"]+[f for f in df.columns if f not in smiles_feature_names]
domain_feature_names =[f for f in df.columns if f not in smiles_feature_names]


#apply feature selection
logging.info("Number of all available features: {num}".format(num=len(domain_feature_names)))
if args.use_domain=='rdkit':
    domain_feature_names = [f for f in domain_feature_names if f in selected_features]
    #domain_feature_names = ["yield"]+[f for f in domain_feature_names if f in selected_features]
    logging.info("Selecting features...")
    logging.info("Number of features after feature selection: {num}".format(num=len(domain_feature_names)))
else:
    logging.info("Not running feature selection!")



train_set_domain = train_set[domain_feature_names]
test_set_domain = test_set[domain_feature_names]

train_set_smiles = train_set[smiles_feature_names]
test_set_smiles = test_set[smiles_feature_names]

scaler = StandardScaler()

train_set_domain_scaled = pd.DataFrame(scaler.fit_transform(train_set_domain),columns = domain_feature_names)
test_set_domain_scaled = pd.DataFrame(scaler.transform(test_set_domain),columns = domain_feature_names)

assert train_set_domain.shape[0]  == train_set_smiles.shape[0] == train_set_domain_scaled.shape[0]
assert test_set_domain.shape[0]  == test_set_smiles.shape[0] == test_set_domain_scaled.shape[0]




06/12/21 10:47:31: Loading Dataset in ./data/az/processed/az_no_rdkit.csv
06/12/21 10:47:31: Using the split set number 2
06/12/21 10:47:31: Number of all available features: 361
06/12/21 10:47:31: Not running feature selection!


In [6]:
print(test_set_domain.shape,test_set_smiles.shape,test_set_domain_scaled.shape)

(300, 360) (300, 6) (300, 360)


In [22]:
logging.info("{:-^80}".format("Dataset"))
#feeding train smiles to test and vice versa to make sure our encoding is consistent.
# no label information is used on test set here.
train_dataset = RxnGD(train_set_domain_scaled,train_set_smiles, test_set_smiles, args.max_nbonds, args.max_natoms, args.use_domain)
test_dataset = RxnGD(test_set_domain_scaled, test_set_smiles, train_set_smiles, args.max_nbonds, args.max_natoms, args.use_domain)

                   
sample = train_dataset[3]
afeats_size, bfeats_size, binary_size, dmfeats_size = (sample["atom_feats"].shape[-1], sample["bond_feats"].shape[-1],
                                        sample["binary_feats"].shape[-1], sample['domain_feats'].shape[-1])


d1,d2,d3 = sample["binary_feats"].shape
binary_size= d3*d2


logging.info("{:d} samples for training ,{:d} samples for testing".format(train_set.shape[0], test_set.shape[0]))
logging.info("{:-^80}".format("Data loaders"))
logging.info("Batch size: {:d}  Workers: {:d}  Shuffle per epoch: {}".format(args.batch_size, args.num_workers, True))
logging.info("Drop incomplete batches: {}".format(True))

train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True,
                              collate_fn=collate_fn, drop_last=True)

test_batch_size = args.test_batch_size if args.test_batch_size is not None else args.batch_size
test_dataloader = DataLoader(test_dataset, batch_size=test_batch_size, num_workers=args.num_workers, 
                             collate_fn=collate_fn,drop_last=True)


logging.info("{:-^80}".format("Model"))
logging.info("Graph convolution layers: {}  Hidden size: {}".format(
    args.layers, args.hidden, args.batch_size, args.epochs))



06/12/21 12:26:19: ------------------------------------Dataset-------------------------------------
06/12/21 12:26:22: Loading the other dataset (not used in training):

06/12/21 12:26:22: Dataset contains 450 total samples
06/12/21 12:26:22: 110 max number of atoms 
06/12/21 12:26:24: Loading the other dataset (not used in training):

06/12/21 12:26:25: Dataset contains 300 total samples
06/12/21 12:26:25: 101 max number of atoms 
06/12/21 12:26:25: 450 samples for training ,300 samples for testing
06/12/21 12:26:25: ----------------------------------Data loaders----------------------------------
06/12/21 12:26:25: Batch size: 40  Workers: 4  Shuffle per epoch: True
06/12/21 12:26:25: Drop incomplete batches: True
06/12/21 12:26:25: -------------------------------------Model--------------------------------------
06/12/21 12:26:25: Graph convolution layers: 2  Hidden size: 200


In [14]:


################################################################
#Build RxnNet and RxnTrainer
################################################################

net = YieldNet(depth=args.layers, dropout= args.dropout_rate, afeats_size=afeats_size, bfeats_size=bfeats_size,
             hidden_size=args.hidden, binary_size=binary_size,dmfeats_size=dmfeats_size, max_nbonds=args.max_nbonds,use_domain=args.use_domain, abs_score=args.abs)
logging.info("Total Parameters: {:,d}".format(sum([p.nelement() for p in net.parameters()])))

logging.info("{:-^80}".format("Trainer"))
logging.info("Optimizer: {}  Beta1: {}  Beta2: {}".format("Adam", args.adam_beta1, args.adam_beta2))
logging.info("Learning rate: {}  Learning rate decay: {}  Steps between updates: {}".format(
    args.lr, args.lr_decay, args.lr_steps))
logging.info("Weight decay: {} , Dropout Rate: {}, Gradient clipping: {}  Positive sample weighting: {}".format(
    args.adam_weight_decay, args.dropout_rate, args.grad_clip, args.pos_weight))
trainer = YieldTrainer(net, lr=args.lr, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay,
                     with_cuda=args.with_cuda, cuda_devices=args.cuda_devices, log_freq=args.log_freq,
                     grad_clip=args.grad_clip, pos_weight=args.pos_weight, lr_decay=args.lr_decay,
                     lr_steps=args.lr_steps, max_nbonds=args.max_nbonds)

#
################################################################
#Train
################################################################

train_r2 = open(output_path+'train_scores.txt', 'a')
train_l = open(output_path+'train_loss.txt', 'a')

test_r2 = open(output_path+'test_scores.txt', 'a')
test_l = open(output_path+'test_loss.txt', 'a')

#valid_r2 = open(output_path+'valid_scores.txt', 'a')
#valid_l = open(output_path+'valid_loss.txt', 'a')


w1_fn = open(output_path+'weights_1.txt', 'a')
w2_fn = open(output_path+'weights_2.txt', 'a')

max_score=0
for epoch in range(args.epochs):
    r2_train, train_loss,w1,w2 = trainer.train_epoch(epoch, train_dataloader)
    r2_test ,test_loss = trainer.test_epoch(epoch, test_dataloader)
    #r2_valid ,valid_loss = trainer.valid_epoch(epoch, valid_dataloader)
    
    train_r2.write(str(float(r2_train))+',')
    test_r2.write(str(float(r2_test))+',')
    #valid_r2.write(str(float(r2_valid))+',')
    
    train_l.write(str(float(train_loss))+',')
    test_l.write(str(float(test_loss))+',')
    #valid_l.write(str(float(valid_loss))+',')
    
    if r2_test>max_score:
        trainer.save(epoch, model_name, args.output_path)
        max_score=r2_test
    
                
train_r2.close();test_r2.close();#valid_r2.close()
train_l.close();test_l.close();#valid_l.close()
w1_fn.close();w2_fn.close()




06/06/21 21:52:11: 450 samples for training ,300 samples for testing
06/06/21 21:52:11: ----------------------------------Data loaders----------------------------------
06/06/21 21:52:11: Batch size: 40  Workers: 4  Shuffle per epoch: True
06/06/21 21:52:11: Drop incomplete batches: True
06/06/21 21:52:11: -------------------------------------Model--------------------------------------
06/06/21 21:52:11: Graph convolution layers: 2  Hidden size: 200
06/06/21 21:52:11: Total Parameters: 378,166
06/06/21 21:52:11: ------------------------------------Trainer-------------------------------------
06/06/21 21:52:11: Optimizer: Adam  Beta1: 0.9  Beta2: 0.999
06/06/21 21:52:11: Learning rate: 0.01  Learning rate decay: 0.5  Steps between updates: 10000
06/06/21 21:52:11: Weight decay: 0.0 , Dropout Rate: 0.04, Gradient clipping: 0.8  Positive sample weighting: None
06/06/21 21:52:11: ********************************************************************************

06/06/21 21:52:11: ----------

RuntimeError: size mismatch, m1: [4040 x 29], m2: [31 x 200] at /opt/conda/conda-bld/pytorch_1587428091666/work/aten/src/TH/generic/THTensorMath.cpp:41

In [15]:
%debug

> [0;32m/afs/crc.nd.edu/user/m/msaebi/anaconda3/envs/yieldrxn/lib/python3.6/site-packages/torch/nn/functional.py[0m(1612)[0;36mlinear[0;34m()[0m
[0;32m   1610 [0;31m        [0mret[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0maddmm[0m[0;34m([0m[0mbias[0m[0;34m,[0m [0minput[0m[0;34m,[0m [0mweight[0m[0;34m.[0m[0mt[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   1611 [0;31m    [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m-> 1612 [0;31m        [0moutput[0m [0;34m=[0m [0minput[0m[0;34m.[0m[0mmatmul[0m[0;34m([0m[0mweight[0m[0;34m.[0m[0mt[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   1613 [0;31m        [0;32mif[0m [0mbias[0m [0;32mis[0m [0;32mnot[0m [0;32mNone[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m   1614 [0;31m            [0moutput[0m [0;34m+=[0m [0mbias[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  up


> [0;32m/afs/crc.nd.edu/user/m/msaebi/anaconda3/envs/yieldrxn/lib/python3.6/site-packages/torch/nn/modules/linear.py[0m(87)[0;36mforward[0;34m()[0m
[0;32m     85 [0;31m[0;34m[0m[0m
[0m[0;32m     86 [0;31m    [0;32mdef[0m [0mforward[0m[0;34m([0m[0mself[0m[0;34m,[0m [0minput[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 87 [0;31m        [0;32mreturn[0m [0mF[0m[0;34m.[0m[0mlinear[0m[0;34m([0m[0minput[0m[0;34m,[0m [0mself[0m[0;34m.[0m[0mweight[0m[0;34m,[0m [0mself[0m[0;34m.[0m[0mbias[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     88 [0;31m[0;34m[0m[0m
[0m[0;32m     89 [0;31m    [0;32mdef[0m [0mextra_repr[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  up


> [0;32m/afs/crc.nd.edu/user/m/msaebi/anaconda3/envs/yieldrxn/lib/python3.6/site-packages/torch/nn/modules/module.py[0m(550)[0;36m__call__[0;34m()[0m
[0;32m    548 [0;31m            [0mresult[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0m_slow_forward[0m[0;34m([0m[0;34m*[0m[0minput[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    549 [0;31m        [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 550 [0;31m            [0mresult[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mforward[0m[0;34m([0m[0;34m*[0m[0minput[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    551 [0;31m        [0;32mfor[0m [0mhook[0m [0;32min[0m [0mself[0m[0;34m.[0m[0m_forward_hooks[0m[0;34m.[0m[0mvalues[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    552 [0;31m            [0mhook_result[0m [0;34m=[0m [0mhook[0m[0;34m([0m[0msel

ipdb>  up


> [0;32m/afs/crc.nd.edu/user/m/msaebi/Public/chemistry/yield-rxn/rxntorch/models/wln.py[0m(22)[0;36mforward[0;34m()[0m
[0;32m     20 [0;31m    [0;32mdef[0m [0mforward[0m[0;34m([0m[0mself[0m[0;34m,[0m [0matom_feats_1[0m[0;34m,[0m [0mbond_feats[0m[0;34m,[0m [0matom_graph[0m[0;34m,[0m [0mbond_graph[0m[0;34m,[0m [0mnum_nbs[0m[0;34m,[0m [0mn_atoms[0m[0;34m,[0m [0mmask_neis[0m[0;34m,[0m [0mmask_atoms[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     21 [0;31m        [0;31m#print("atom_feats_1.shape: ",atom_feats_1.shape)[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 22 [0;31m        [0matom_feats[0m [0;34m=[0m [0mF[0m[0;34m.[0m[0mrelu[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mfc1[0m[0;34m([0m[0matom_feats_1[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     23 [0;31m        [0mbondnei_feats[0m [0;34m=[0m [0mtorch[0m[0;34m.[0m[0mstack[0m[0;34m([0m[0;34m[[0m[0mbond_f

ipdb>  atom_feats_1.shape


torch.Size([40, 101, 29])


ipdb>  self.fc1


Linear(in_features=31, out_features=200, bias=False)


ipdb>  atom_feats_1.shape


torch.Size([40, 101, 29])


ipdb>  atom_feats_1


tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 1.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 1., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0., 

ipdb>  up


> [0;32m/afs/crc.nd.edu/user/m/msaebi/anaconda3/envs/yieldrxn/lib/python3.6/site-packages/torch/nn/modules/module.py[0m(550)[0;36m__call__[0;34m()[0m
[0;32m    548 [0;31m            [0mresult[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0m_slow_forward[0m[0;34m([0m[0;34m*[0m[0minput[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    549 [0;31m        [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 550 [0;31m            [0mresult[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mforward[0m[0;34m([0m[0;34m*[0m[0minput[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    551 [0;31m        [0;32mfor[0m [0mhook[0m [0;32min[0m [0mself[0m[0;34m.[0m[0m_forward_hooks[0m[0;34m.[0m[0mvalues[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    552 [0;31m            [0mhook_result[0m [0;34m=[0m [0mhook[0m[0;34m([0m[0msel

ipdb>  up


> [0;32m/afs/crc.nd.edu/user/m/msaebi/Public/chemistry/yield-rxn/rxntorch/models/yield_network.py[0m(25)[0;36mforward[0;34m()[0m
[0;32m     23 [0;31m[0;34m[0m[0m
[0m[0;32m     24 [0;31m    [0;32mdef[0m [0mforward[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mfatoms[0m[0;34m,[0m [0mfbonds[0m[0;34m,[0m [0matom_nb[0m[0;34m,[0m [0mbond_nb[0m[0;34m,[0m [0mnum_nbs[0m[0;34m,[0m [0mn_atoms[0m[0;34m,[0m [0mbinary_feats[0m[0;34m,[0m [0mmask_neis[0m[0;34m,[0m [0mmask_atoms[0m[0;34m,[0m [0msparse_idx[0m[0;34m,[0m[0mdomain_feats[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 25 [0;31m        [0mlocal_features[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mwln[0m[0;34m([0m[0mfatoms[0m[0;34m,[0m [0mfbonds[0m[0;34m,[0m [0matom_nb[0m[0;34m,[0m [0mbond_nb[0m[0;34m,[0m [0mnum_nbs[0m[0;34m,[0m [0mn_atoms[0m[0;34m,[0m [0mmask_neis[0m[0;34m,[0m [0mmask_atoms[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m


ipdb>  up


> [0;32m/afs/crc.nd.edu/user/m/msaebi/Public/chemistry/yield-rxn/rxntorch/models/yield_network.py[0m(101)[0;36miterate[0;34m()[0m
[0;32m     99 [0;31m            yield_scores = self.model.forward(data['atom_feats'], data['bond_feats'],data['atom_graph'], 
[0m[0;32m    100 [0;31m                                                [0mdata[0m[0;34m[[0m[0;34m'bond_graph'[0m[0;34m][0m[0;34m,[0m [0mdata[0m[0;34m[[0m[0;34m'n_bonds'[0m[0;34m][0m[0;34m,[0m[0mdata[0m[0;34m[[0m[0;34m'n_atoms'[0m[0;34m][0m[0;34m,[0m [0mdata[0m[0;34m[[0m[0;34m'binary_feats'[0m[0;34m][0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 101 [0;31m                                                mask_neis, mask_atoms,data['sparse_idx'],data['domain_feats'])
[0m[0;32m    102 [0;31m            [0mcriteria[0m[0;34m=[0m[0mnn[0m[0;34m.[0m[0mMSELoss[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    103 [0;31m[0;34m[0m[0m
[0m


ipdb>  i


0


ipdb>  up


> [0;32m/afs/crc.nd.edu/user/m/msaebi/Public/chemistry/yield-rxn/rxntorch/models/yield_network.py[0m(67)[0;36mtest_epoch[0;34m()[0m
[0;32m     65 [0;31m        [0;32mwith[0m [0mtorch[0m[0;34m.[0m[0mno_grad[0m[0;34m([0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     66 [0;31m            [0mself[0m[0;34m.[0m[0mmodel[0m[0;34m.[0m[0meval[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 67 [0;31m            [0mr2[0m [0;34m,[0m[0mloss[0m[0;34m=[0m [0mself[0m[0;34m.[0m[0miterate[0m[0;34m([0m[0mepoch[0m[0;34m,[0m [0mdata_loader[0m[0;34m,[0m [0mtrain[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0mvalid[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     68 [0;31m        [0;32mreturn[0m [0mr2[0m[0;34m,[0m[0mloss[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     69 [0;31m[0;34m[0m[0m
[0m


ipdb>  down


> [0;32m/afs/crc.nd.edu/user/m/msaebi/Public/chemistry/yield-rxn/rxntorch/models/yield_network.py[0m(101)[0;36miterate[0;34m()[0m
[0;32m     99 [0;31m            yield_scores = self.model.forward(data['atom_feats'], data['bond_feats'],data['atom_graph'], 
[0m[0;32m    100 [0;31m                                                [0mdata[0m[0;34m[[0m[0;34m'bond_graph'[0m[0;34m][0m[0;34m,[0m [0mdata[0m[0;34m[[0m[0;34m'n_bonds'[0m[0;34m][0m[0;34m,[0m[0mdata[0m[0;34m[[0m[0;34m'n_atoms'[0m[0;34m][0m[0;34m,[0m [0mdata[0m[0;34m[[0m[0;34m'binary_feats'[0m[0;34m][0m[0;34m,[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 101 [0;31m                                                mask_neis, mask_atoms,data['sparse_idx'],data['domain_feats'])
[0m[0;32m    102 [0;31m            [0mcriteria[0m[0;34m=[0m[0mnn[0m[0;34m.[0m[0mMSELoss[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    103 [0;31m[0;34m[0m[0m
[0m


ipdb>  i


0


ipdb>  data['atom_feats'].shape


torch.Size([40, 101, 29])


ipdb>  q


In [None]:
dataset

In [5]:
self.df_domain.iloc[i]

NameError: name 'self' is not defined