# Generate compounds

In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import sys
sys.path.append("../cp_mol_gan" )
import utils
import inference as infr
import tensorflow as tf
import tensorflow.keras.backend as K
tf.logging.set_verbosity(tf.logging.ERROR)


### Inputs

In [2]:
args = {
    'profiles_file':'example_data.csv',
    'output_file':'example_generated_mols.csv',
    "Nmols_per_condition": 2,
    'quantile_transformer':'../cp_mol_gan/data/quantile_transformer.pkl',
    'neural_net':{
        'autoencoder': '../cp_mol_gan/data/selfies_EncoderDecoder_epoch0010.h5',
         'wgan':{
                'C': '../cp_mol_gan/data/wgan_C_500epochs.h5',
                'D': '../cp_mol_gan/data/wgan_D_500epochs.h5',
                'G1':'../cp_mol_gan/data/wgan_G_500epochs.h5',
                'condition_encoder':'../cp_mol_gan/data/wgan_condition_encoder_500epochs.h5',
                'classifier':'../cp_mol_gan/data/wgan_classifier.h5'
            }
        },
    'gpu': '7'
}

### Read profiles and apply quantile transformer

In [3]:
profiles = pd.read_csv(args['profiles_file'])
quantile_transformer =  pickle.load( open( args['quantile_transformer'], 'rb' ) )
feature_cols , meta_cols = utils.get_feature_cols(profiles)
profiles[feature_cols] = quantile_transformer.transform(profiles[feature_cols].values) 
print('Total profiles: %i \nTotal fetaures: %i'%profiles.shape)

Total profiles: 100 
Total fetaures: 1454


### Set compute environment

In [4]:
os.environ['CUDA_VISIBLE_DEVICES'] = str(args['gpu'])
gpu_options = tf.GPUOptions(visible_device_list='0')
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
K.set_session(sess)
tf.config.set_soft_device_placement(True)
tf.debugging.set_log_device_placement(True)

### Load model and generate molecules

In [5]:
model = infr.InferenceModel( args['neural_net'] ) 
generated = infr.generate_compounds_multiple_conditions( model, profiles, feature_cols, meta_cols, seed=10, nsamples = args['Nmols_per_condition'])

100%|██████████| 100/100 [00:54<00:00,  1.85it/s]


### Check validity 

In [6]:
generated["SMILES_standard"]= utils.clean_smiles_parallel( generated.SMILES )
generated['valid'] = generated.SMILES_standard.isnull()==False

### Save results

In [8]:
generated.to_csv( args['output_file'], index=False)