In [3]:
%load_ext autoreload
%autoreload 2


import numpy as np
import rdkit
import random
import pandas as pd

from rdkit.Chem import AllChem as Chem
from rdkit.DataStructs import TanimotoSimilarity

import h5py, ast, pickle

# Occupy a GPU for the model to be loaded 
%env CUDA_DEVICE_ORDER=PCI_BUS_ID
# GPU ID, if occupied change to an available GPU ID listed under !nvidia-smi
%env CUDA_VISIBLE_DEVICES=0 

from ddc_pub import ddc_v3 as ddc

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0


In [4]:
# Import existing (trained) model
# Ignore any warning(s) about training configuration or non-seriazable keyword arguments
model_name = "models/opd_desc"
model = ddc.DDC(model_name=model_name)

Initializing model in test mode.
Loading model.
'mol_to_latent_model' not found, setting to None.
Loading finished in 2 seconds.
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Latent_Input (InputLayer)       [(None, 3)]          0                                            
__________________________________________________________________________________________________
Decoder_Inputs (InputLayer)     [(None, 281, 47)]    0                                            
__________________________________________________________________________________________________
latent_to_states_model (Model)  [(None, 512), (None, 24576       Latent_Input[0][0]               
__________________________________________________________________________________________________
batch_model (Model)             (None, 281, 47)      5381679  

In [28]:
# Sample descriptor sets around desired properties
descr_in = []

for index_temp in range(1000):
    descr_in += [np.array([-7 + random.uniform(-.25, .25), 
                           2.5 + random.uniform(-.25, .25),
                           -4.5 + random.uniform(-1.5, 1.5)
                     ])]

In [None]:
# Convert descriptors to SMILES
nll_desc = []
smiles_out_desc = []
smiles_in_desc = []

for index_temp, target in enumerate(descr_in):
    smiles_out_temp, nll_temp = model.predict(
        latent=target, temp=0) # Change temp to 1 for more funky results

    smiles_out_desc += [smiles_out_temp]
    nll_desc += [nll_temp]
    
    print(index_temp)

In [None]:
# To compare the results, convert smiles_out to canonical and assign invalid SMILES
for idx, smiles in enumerate(smiles_out_desc):
    mol = Chem.MolFromSmiles(smiles)
    if mol:
        smiles_out_desc[idx] = Chem.MolToSmiles(mol, canonical=True)
    else:
        smiles_out_desc[idx] = "invalid"