# Generate compounds

In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import sys
sys.path.append("../cp_mol_gan" )
import utils
import inference as infr
import tensorflow as tf
import tensorflow.keras.backend as K
tf.logging.set_verbosity(tf.logging.ERROR)


### Inputs

In [2]:
args = {
    'profiles_file':'example_data.csv',
    'output_file':'example_generated_mols.csv',
    "Nmols_per_condition": 2,
    'quantile_transformer':'../cp_mol_gan/data/quantile_transformer.pkl',
    'neural_net':{
        'autoencoder': '../cp_mol_gan/data/selfies_EncoderDecoder_epoch0010.h5',
         'wgan':{
                'C': '../cp_mol_gan/data/wgan_C_500epochs.h5',
                'D': '../cp_mol_gan/data/wgan_D_500epochs.h5',
                'G':'../cp_mol_gan/data/wgan_G_500epochs.h5',
                'condition_encoder':'../cp_mol_gan/data/wgan_condition_encoder_500epochs.h5',
                'classifier':'../cp_mol_gan/data/wgan_classifier.h5'
            }
        },
    'gpu': '7'
}

### Read profiles and apply quantile transformer

In [3]:
profiles = pd.read_csv(args['profiles_file'])
quantile_transformer =  pickle.load( open( args['quantile_transformer'], 'rb' ) )
feature_cols , meta_cols = utils.get_feature_cols(profiles)
profiles[feature_cols] = quantile_transformer.transform(profiles[feature_cols].values) 
print('Total profiles: %i \nTotal fetaures: %i'%profiles.shape)

Total profiles: 10 
Total fetaures: 1453


### Set compute environment

In [4]:
os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu'])
gpu_options = tf.GPUOptions(visible_device_list='0')
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
K.set_session(sess)
tf.config.set_soft_device_placement(True)
tf.debugging.set_log_device_placement(True)

### Load model and generate molecules

In [9]:
model = infr.InferenceModel( args['neural_net'] ) 
generated = infr.generate_compounds_multiple_conditions( model, profiles, feature_cols, meta_cols, seed=10, nsamples = args['Nmols_per_condition'])

100%|██████████| 10/10 [00:05<00:00,  1.70it/s]


### Check validity 

In [10]:
generated["SMILES_standard"]= utils.clean_smiles_parallel( generated.SMILES )
generated['valid'] = generated.SMILES_standard.isnull()==False

### Save results

In [7]:
generated.to_csv( args['output_file'], index=False)

In [11]:
generated.head()

Unnamed: 0,condition_Metadata_Plate,condition_BROAD_ID,condition_SMILES,condition_Metadata_Well,SMILES,classification_score,SMILES_standard,valid
0,24726.0,BRD-K46807184-001-01-5,C[C@@H](CO)N1C[C@@H](C)[C@@H](CN(C)C(=O)Nc2ccc...,n12,COCCNC(=O)C(=CNC(=O)OCC(c1ccccc1)NC=O),0.93522,COCCNC(=O)C=CNC(=O)OCC(NC=O)c1ccccc1,True
1,24726.0,BRD-K46807184-001-01-5,C[C@@H](CO)N1C[C@@H](C)[C@@H](CN(C)C(=O)Nc2ccc...,n12,COcccc(C(=O)NNC(=O)Nccc(C(F)(F)F)nc(N1CCCCC1)),0.904445,,False
2,25639.0,BRD-A71962374-001-05-9,CCOC(=O)C(CC)Sc1nc2c(c(=O)[nH]c(=O)n2C)n1C/C=C...,p16,CNCCc1sc2c(c1C)N(C(=O)c1c[nH]c3ccccc13)C(=O)N2C,0.802137,CNCCc1sc2c(c1C)n(C(=O)c1c[nH]c3ccccc13)c(=O)n2C,True
3,25639.0,BRD-A71962374-001-05-9,CCOC(=O)C(CC)Sc1nc2c(c(=O)[nH]c(=O)n2C)n1C/C=C...,p16,COcccc(CN(C)nn1c(=O)nc(-c2ccc3(F)cc2)c1C3)#N,0.787629,,False
4,24512.0,BRD-K17354218-001-06-2,C=CCN1C(=O)NC(=O)/C(=C\Nc2ccc(OC)cc2C)C1=O,i03,O=Cc1ccc2cc1-c1c(ccc(n1)NC(=O)NO)C(=O)N(C)C2,0.802505,CN1Cc2ccc(C=O)c(c2)-c2nc(NC(=O)NO)ccc2C1=O,True
