In [1]:
import builders, data
import mxnet as mx
import os
import numpy as np
import pandas as pd
import random
import itertools
from tqdm import tqdm
from data import utils
import warnings
warnings.filterwarnings('ignore')

In [2]:
f = open('datasets/Normdata.txt')
Normdata = f.read()
f.close()
Normdata = eval(Normdata)
def mkdir(path):
    if not os.path.isdir(path):
        os.mkdir(path)

def canon_smiles(smi):
    if Chem.MolFromSmiles(smi) == None:
        return ''
    else:
        return Chem.MolToSmiles(Chem.MolFromSmiles(smi), isomericSmiles=True, canonical=True)    
        
def obj_norm(obj,sample_count):
    abs_obj, emi_obj, FWHM_abs_obj, FWHM_emi_obj, PLQY_obj, extin_obj, life_obj = obj
    abs_obj_norm = np.array([(abs_obj-Normdata['abs_mean'])/Normdata['abs_std'],]*sample_count).reshape(-1,1)
    emi_obj_norm = np.array([(emi_obj-Normdata['emi_mean'])/Normdata['emi_std'],]*sample_count).reshape(-1,1)
    FWHM_abs_obj_norm = np.array([(FWHM_abs_obj-Normdata['FWHM_Abs_mean'])/Normdata['FWHM_Abs_std'],]*sample_count).reshape(-1,1)
    FWHM_emi_obj_norm = np.array([(FWHM_emi_obj-Normdata['FWHM_Emi_mean'])/Normdata['FWHM_Emi_std'],]*sample_count).reshape(-1,1)
    PLQY_obj_norm = np.array([(PLQY_obj-Normdata['PLQY_mean'])/Normdata['PLQY_std'],]*sample_count).reshape(-1,1)
    extin_obj_norm = np.array([(extin_obj-Normdata['extin_mean'])/Normdata['extin_std'],]*sample_count).reshape(-1,1)
    life_obj_norm = np.array([(np.log10(life_obj)-Normdata['life_mean'])/Normdata['life_std'],]*sample_count).reshape(-1,1)
    return np.concatenate([abs_obj_norm, emi_obj_norm,life_obj_norm,PLQY_obj_norm, extin_obj_norm, FWHM_abs_obj_norm, FWHM_emi_obj_norm],axis=1)

In [3]:
path = 'DeepMoleculeGen'

def gen_mol(obj_list, sol_smiles, num_gen,foldername, scaffold=None):
    mkdir("generate")
    for obj in obj_list:
        savelist = []
        [abs_obj, emi_obj, FWHM_abs_obj, FWHM_emi_obj, PLQY_obj, extin_obj, life_obj] = obj
        
        success_num = 0
        while True:
            try:
                if scaffold == None:
                    X=A=NX=NA=last_action = scaffold
                else:
                    smiles =canon_smiles(scaffold)
                    i = 1000
                    graph, atom_types, atom_ranks, bonds, bond_types = utils.get_graph_from_smiles(smiles)
                    X_in = np.array(atom_types, dtype=np.int32)
                    A_in = np.concatenate([np.array(bonds, dtype=np.int32),
                                              np.array(bond_types, dtype=np.int32)[:, np.newaxis]],
                                              axis=1)
                    NX_in = np.array([X_in.shape[0]],dtype=np.int32)
                    NA_in = np.array([A_in.shape[0]],dtype=np.int32)
                    last_action_in = np.array([1],dtype=np.int32)
                    X = np.concatenate([X_in]*i,axis=0)
                    A = np.concatenate([A_in]*i,axis=0)
                    NX = np.concatenate([NX_in]*i,axis=0)
                    NA = np.concatenate([NA_in]*i,axis=0)
                    last_action = np.concatenate([last_action_in]*i,axis=0)
                # conditional codes:
                sample_count = 100
                c = obj_norm(obj,sample_count)
                sol_adj, sol_feat, NX_sol = utils.get_sol_matrix([sol_smiles,]*sample_count)
                c = np.array(c, dtype=np.float32)
                # load model
                mdl_prop = builders.Optical_RNN_Builder('ckpt/'+path+'/',ctx=mx.gpu(1),gpu_id=0)
                # sample results
                prop_outputs = []
                samples_prop_i = []
                samples_prop_i = [m for m in mdl_prop.sample(sample_count, c=c,sol_adj=sol_adj,sol_feat=sol_feat,NX_sol=NX_sol, X=X, A=A, NX=NX, NA=NA, last_action=last_action) if m is not None]
                _smiles_list = [Chem.MolToSmiles(m) for m in samples_prop_i]
                samples_prop_i = [Chem.MolFromSmiles(s) for s in _smiles_list]
                prop_outputs.append(samples_prop_i)
                new_smiles = []
                for i in _smiles_list:
                    try:
                        a = Chem.MolFromSmiles(i)
                        a = Chem.MolToSmiles(a)
                        new_smiles.append(i)
                    except:
                        print(i)
                print(len(new_smiles))
                sol_smiles_list = [sol_smiles,]*len(new_smiles)
                save_data_i = pd.concat([pd.DataFrame(new_smiles).rename(columns={0:'Chromophore'}),pd.DataFrame(sol_smiles_list).rename(columns={0:'Solvent'})],axis=1)
                savelist.append(save_data_i)
                success_num += 1
                print(success_num)
                if success_num == num_gen:
                    break
            except Exception as e:
                print(e)
        mkdir("generate/"+foldername+"/")
        filename = "generate/"+str(foldername)+'/'+str(abs_obj)+"_"+str(emi_obj)+"_"+str(FWHM_abs_obj)+"_"+str(FWHM_emi_obj)+"_"+str(PLQY_obj)+"_"+str(extin_obj)+"_"+str(life_obj)
        while os.path.isfile(filename):
            filename += '_new'
        pd.concat(savelist,axis=0).to_csv(filename+'.csv',index=None)


In [4]:
from rdkit import Chem                                                                                                                                                                   
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')
#####################################
solvent = canon_smiles('Cc1ccccc1')
folder_name = "test"

obj_list = [[500,550,3500,2500,0.5,4.5,1.74]]
# Absorption, Emission, Abs. bandwidth, Emi. bandwidth, PLQY, Extin. Coeff., Lifetime (ns) 

num_gen = 10 # 100* 10

gen_mol(obj_list, solvent, folder_name, num_gen)



init
89
1
init



KeyboardInterrupt

