In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from rdkit import Chem

In [None]:
output_dir = 'outputs_11_topk_molecules'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [None]:
## generated by ReBADD-SE
filepath_rebadd = os.path.join('outputs_7_calculate_properties_generated', 'frag+reinforce+scst+offpolicy', 'smi_after.csv.0')
df_rebadd = pd.read_csv(filepath_rebadd).loc[:,('smiles', 'bcl2', 'bclxl', 'bclw', 'sa', 'ra')].drop_duplicates()
df_rebadd.loc[:,'TP'] = df_rebadd.loc[:,'bcl2'] * df_rebadd.loc[:,'bclxl'] * df_rebadd.loc[:,'bclw'] * (10 - df_rebadd.loc[:,'sa']) * df_rebadd.loc[:,'ra'] * 1e-4

In [None]:
## generated by RationaleRL
filepath_rationale = os.path.join('baseline', 'RationaleRL', 'smi_after.csv.0')
df_rationale = pd.read_csv(filepath_rationale).loc[:,('smiles', 'bcl2', 'bclxl', 'bclw', 'sa', 'ra')].drop_duplicates()
df_rationale.loc[:,'TP'] = df_rationale.loc[:,'bcl2'] * df_rationale.loc[:,'bclxl'] * df_rationale.loc[:,'bclw'] * (10 - df_rationale.loc[:,'sa']) * df_rationale.loc[:,'ra'] * 1e-4

In [None]:
## generated by MARS
filepath_mars = os.path.join('baseline', 'MARS', 'smi_after.csv.0')
df_mars = pd.read_csv(filepath_mars).loc[:,('smiles', 'bcl2', 'bclxl', 'bclw', 'sa', 'ra')].drop_duplicates()
df_mars.loc[:,'TP'] = df_mars.loc[:,'bcl2'] * df_mars.loc[:,'bclxl'] * df_mars.loc[:,'bclw'] * (10 - df_mars.loc[:,'sa']) * df_mars.loc[:,'ra'] * 1e-4

In [None]:
## generated by ReLeaSE
filepath_release = os.path.join('baseline', 'ReLeaSE', 'smi_after.csv.0')
df_release = pd.read_csv(filepath_release).loc[:,('smiles', 'bcl2', 'bclxl', 'bclw', 'sa', 'ra')].drop_duplicates()
df_release.loc[:,'TP'] = df_release.loc[:,'bcl2'] * df_release.loc[:,'bclxl'] * df_release.loc[:,'bclw'] * (10 - df_release.loc[:,'sa']) * df_release.loc[:,'ra'] * 1e-4

In [None]:
## generated by MolGPT
filepath_molgpt = os.path.join('baseline', 'MolGPT', 'smi_after.csv.0')
df_molgpt = pd.read_csv(filepath_molgpt).loc[:,('smiles', 'bcl2', 'bclxl', 'bclw', 'sa', 'ra')].drop_duplicates()
df_molgpt.loc[:,'TP'] = df_molgpt.loc[:,'bcl2'] * df_molgpt.loc[:,'bclxl'] * df_molgpt.loc[:,'bclw'] * (10 - df_molgpt.loc[:,'sa']) * df_molgpt.loc[:,'ra'] * 1e-4

In [None]:
df_rebadd = df_rebadd.nlargest(3, 'TP').reset_index(drop=True)
df_rebadd.loc[:,'label'] = 'ReBADD-SE'

df_rationale = df_rationale.nlargest(3, 'TP').reset_index(drop=True)
df_rationale.loc[:,'label'] = 'RationaleRL'

df_mars = df_mars.nlargest(3, 'TP').reset_index(drop=True)
df_mars.loc[:,'label'] = 'MARS'

df_release = df_release.nlargest(3, 'TP').reset_index(drop=True)
df_release.loc[:,'label'] = 'ReLeaSE'

df_molgpt = df_molgpt.nlargest(3, 'TP').reset_index(drop=True)
df_molgpt.loc[:,'label'] = 'MolGPT'

df_merged = pd.concat((df_rebadd, df_rationale, df_mars, df_release, df_molgpt))

In [None]:
df_merged

In [None]:
df_merged.to_csv(os.path.join(output_dir, 'top3_molecules.csv'), index=False)

In [None]:
df_rebadd

In [None]:
fig, ax = plt.subplots(3,1,figsize=(6,6))

for i, smi in enumerate(df_rebadd.loc[:,'smiles'].values):
    mol = Chem.MolFromSmiles(smi)
    ax[i].imshow(Chem.Draw.MolToImage(mol, size=(900,300)))
    ax[i].axis('off')
    ax[i].set_title(f"TP score: {df_rebadd.loc[i,'TP']:.3f}", loc='left')
    
plt.tight_layout()
plt.savefig(os.path.join(output_dir, 'best_three_molecules_ReBADD-SE.pdf'))