# This notebook contains a short validation of the NLL calculation performed in REINVENT. The current REINVENT version contains is a small bug, where the NLL for  a SMILES may change based on the fact if it is the longest SMILES in a batch or not. The locally provided version of REINVENT fixes this behavior. If you want to reproduce the results from the manuscript, please make sure that your version of reinvent always calculates the same NLL for the SMILES "C" or "CC" no matter if there are longer SMILES present.

In [1]:
import sys
sys.path.append("reinvent/")

In [2]:
#import os
#os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [20]:
import pandas as pd
import numpy as np 
import scipy.stats as sps
from tqdm.auto import tqdm, trange
from models import dataset
from models.model import Model
from utils.smiles import standardize_smiles
from pandarallel import pandarallel
import utils.general

if utils.general.set_default_device_cuda():
    print("GPU is used")
else:
    print("Using CPU")

Using CPU


In [4]:
pandarallel.initialize(progress_bar=False, verbose=1)

In [5]:
chembl_prior = Model.load_from_file("reinvent/data/augmented.prior")

In [13]:
#chembl_prior.network.cpu()

In [13]:
chembl_prior.likelihood_smiles(["C"])

tensor([27.0785], grad_fn=<SumBackward1>)

In [14]:
chembl_prior.likelihood_smiles(["C","CC"])

tensor([27.0785, 24.7070], grad_fn=<SumBackward1>)

In [15]:
chembl_prior.likelihood_smiles(["C","CC","CCC"])

tensor([27.0785, 24.7070, 20.9116], grad_fn=<SumBackward1>)

In [16]:
chembl_prior.likelihood_smiles(["C","CC","CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"])

tensor([27.0785, 24.7070, 57.8311], grad_fn=<SumBackward1>)