In [None]:
import os
import sys
import tqdm
import pandas as pd
import selfies as sf
from rdkit import Chem, RDLogger
RDLogger.DisableLog('rdApp.*')

In [None]:
REBADD_LIB_PATH = os.path.abspath(os.pardir)
if REBADD_LIB_PATH not in sys.path:
    sys.path = [REBADD_LIB_PATH] + sys.path

from rebadd.chemutils import GSKScorer, JNKScorer, SAScorer, RAScorer
from rebadd.chemutils import calc_chem_properties # smi -> (mw, clogp, tpsa, qed)

In [None]:
calc_gsk = GSKScorer().set_params(n_jobs=4)
calc_jnk = JNKScorer().set_params(n_jobs=4)
calc_sa  = SAScorer()
calc_ra  = RAScorer().set_params(n_jobs=4)

In [None]:
def calc_properties(smi):
    ## init
    mw = clogp = tpsa = qed = 0.
    s_gsk = s_jnk = 0.
    sa = 10.
    ra = 0.

    ## eval
    try:
        mw, clogp, tpsa, qed = calc_chem_properties(smi)
        sa = calc_sa(smi)
        ra = calc_ra(smi)
        s_gsk = calc_gsk(smi)
        s_jnk = calc_jnk(smi)

    except Chem.rdchem.AtomKekulizeException:
        pass
    except Chem.rdchem.AtomSanitizeException:
        pass
    except Chem.rdchem.AtomValenceException:
        pass
    except Chem.rdchem.KekulizeException:
        pass
    except Chem.rdchem.MolSanitizeException:
        pass
    
    return {'smiles':smi, 'gsk3b':s_gsk, 'jnk3':s_jnk, 'sa':sa, 'ra':ra, 'mw':mw, 'logp':clogp, 'tpsa':tpsa, 'qed':qed}

In [None]:
drug_smi = "C1=CC=C2C(=C1)C3=NNC4=CC=CC(=C43)C2=O"
pd.DataFrame([calc_properties(drug_smi)])

In [None]:
class CKPTCONFIGS:
    def __init__(self):
        
        self.input_dir = 'outputs_3_checkpoints'                
        self.filenames = [f'smi_after.csv.{num:04d}' for num in range(50,550,50)]
        
ckptconfigs = CKPTCONFIGS()

In [None]:
class OUTPUTCONFIGS:
    def __init__(self):
        self.output_dir = "outputs_4_calculate_properties"
        if not os.path.exists(self.output_dir):
            os.mkdir(self.output_dir)
            
outputconfigs = OUTPUTCONFIGS()

In [None]:
for filename in ckptconfigs.filenames:
    
    filepath = os.path.join(ckptconfigs.input_dir, filename)
    
    df = pd.read_csv(filepath, header=None, skip_blank_lines=True)
    gen_smiles = df.iloc[:,0].values.tolist()
    
    print(len(gen_smiles))
    
    data = []
    for smi in tqdm.tqdm(gen_smiles):
        data.append(calc_properties(smi))
        
    df = pd.DataFrame(data)
    
    output_filepath = os.path.join(outputconfigs.output_dir, filename)
    df.to_csv(output_filepath, index=False)