In [None]:
import os
import sys
import tqdm
import pandas as pd

In [None]:
import torch

In [None]:
REBADD_PATH = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir, 'rebadd'))
if REBADD_PATH not in sys.path:
    sys.path = [REBADD_PATH] + sys.path
    
from bindutils import BAScorerBCL2, BAScorerBCLXL, BAScorerBCLW

# 1. Load SMILES data

In [None]:
filepath_input = 'zinc15_raw_to_canonical.csv'

In [None]:
df = pd.read_csv(filepath_input) # columns : [zinc_id, smiles, mwt, logp, length]

In [None]:
print(f'Number of SMILES: {df.shape[0]}')

# 2. GPU check

In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
print(use_cuda, device)

# 3. DTA

In [None]:
scorer_bcl2  = BAScorerBCL2(device, use_cuda)
scorer_bclxl = BAScorerBCLXL(device, use_cuda)
scorer_bclw  = BAScorerBCLW(device, use_cuda)

# 4. Prediction

In [None]:
data = []

for i in tqdm.trange(df.shape[0]):
    record = {
        'zinc_id':df.loc[i,'zinc_id'],
        'smiles':df.loc[i,'smiles'],
        'mwt':df.loc[i,'mwt'],
        'logp':df.loc[i,'logp'],
        'length':df.loc[i,'length'],
    }

    try:
        smi = record['smiles']
        record['ba_bcl2']  = scorer_bcl2(smi)
        record['ba_bclxl'] = scorer_bclxl(smi)
        record['ba_bclw']  = scorer_bclw(smi)
        data.append(record)
    except:
        continue

# 5. Make a table

In [None]:
df_res = pd.DataFrame(data)
print(f'Number of SMILES whose scores are available: {df_res.shape[0]}')

# 6. Save the result table

In [None]:
filepath_output = 'zinc15_canonical_to_bcl2family.csv'

In [None]:
df_res.to_csv(filepath_output, sep=',', index=False)
print(f'The result is saved in {filepath_output}')