In [None]:
%env CUDA_VISIBLE_DEVICES=0

In [None]:
import os
import sys
import torch
import tqdm
import pandas as pd
import selfies as sf
from rdkit import Chem, RDLogger
RDLogger.DisableLog('rdApp.*')

In [None]:
## Inter-op parallelism
torch.set_num_interop_threads(4)
torch.get_num_interop_threads()
## Intra-op parallelism
torch.set_num_threads(4)
torch.get_num_threads()

In [None]:
class GPUCONFIGS:
    def __init__(self):
        self.use_cuda = torch.cuda.is_available()
        self.device = torch.device('cuda:0' if self.use_cuda else 'cpu')
        if self.use_cuda: torch.cuda.set_device(self.device)
        
gpuconfigs = GPUCONFIGS()
print(torch.cuda.current_device())

In [None]:
REBADD_LIB_PATH = os.path.abspath(os.pardir)
if REBADD_LIB_PATH not in sys.path:
    sys.path = [REBADD_LIB_PATH] + sys.path

from rebadd.bindutils import BAScorerBCL2, BAScorerBCLXL, BAScorerBCLW
from rebadd.chemutils import SAScorer, RAScorer
from rebadd.chemutils import calc_chem_properties # smi -> (mw, clogp, tpsa, qed)

In [None]:
calc_bcl2  = BAScorerBCL2(device=gpuconfigs.device, use_cuda=gpuconfigs.use_cuda)
calc_bclxl = BAScorerBCLXL(device=gpuconfigs.device, use_cuda=gpuconfigs.use_cuda)
calc_bclw  = BAScorerBCLW(device=gpuconfigs.device, use_cuda=gpuconfigs.use_cuda)
calc_sa  = SAScorer()
calc_ra  = RAScorer().set_params(n_jobs=4)

In [None]:
def calc_properties(smi):
    ## init
    mw = clogp = tpsa = qed = 0.
    s_bcl2 = s_bclxl = s_bclw = 0.
    sa = 10.
    ra = 0.

    ## eval
    try:
        mw, clogp, tpsa, qed = calc_chem_properties(smi)
        sa = calc_sa(smi)
        ra = calc_ra(smi)
        s_bcl2  = calc_bcl2(smi)
        s_bclxl = calc_bclxl(smi)
        s_bclw  = calc_bclw(smi)

    except Chem.rdchem.AtomKekulizeException:
        pass
    except Chem.rdchem.AtomSanitizeException:
        pass
    except Chem.rdchem.AtomValenceException:
        pass
    except Chem.rdchem.KekulizeException:
        pass
    except Chem.rdchem.MolSanitizeException:
        pass
    
    return {'smiles':smi, 'bcl2':s_bcl2, 'bclxl':s_bclxl, 'bclw':s_bclw, 'sa':sa, 'ra':ra, 'mw':mw, 'logp':clogp, 'tpsa':tpsa, 'qed':qed}

In [None]:
drug_smi = "CC1(CCC(=C(C1)CN2CCN(CC2)C3=CC=C(C=C3)C(=O)NS(=O)(=O)C4=CC(=C(C=C4)NC(CCN5CCOCC5)CSC6=CC=CC=C6)S(=O)(=O)C(F)(F)F)C7=CC=C(C=C7)Cl)C"
pd.DataFrame([calc_properties(drug_smi)])

In [None]:
class CKPTCONFIGS:
    def __init__(self):
        
        self.input_dir = 'outputs_3_checkpoints'                
        self.filenames = [f'smi_after.csv.{num:04d}' for num in range(50,550,50)]
        
ckptconfigs = CKPTCONFIGS()

In [None]:
class OUTPUTCONFIGS:
    def __init__(self):
        self.output_dir = "outputs_4_calculate_properties"
        if not os.path.exists(self.output_dir):
            os.mkdir(self.output_dir)
            
outputconfigs = OUTPUTCONFIGS()

In [None]:
for filename in ckptconfigs.filenames:
    
    filepath = os.path.join(ckptconfigs.input_dir, filename)
    
    df = pd.read_csv(filepath, header=None, skip_blank_lines=True)
    gen_smiles = df.iloc[:,0].values.tolist()
    
    print(len(gen_smiles))
    
    data = []
    for smi in tqdm.tqdm(gen_smiles):
        data.append(calc_properties(smi))
        
    df = pd.DataFrame(data)
    
    output_filepath = os.path.join(outputconfigs.output_dir, filename)
    df.to_csv(output_filepath, index=False)