In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
sns.set_theme(style='darkgrid')

In [None]:
output_dir = 'outputs_10_property_distribution_analysis'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

In [None]:
## Train data
filepath_tr = os.path.join(os.pardir, 'data', 'zinc15', 'zinc15_train_full.csv')
df_tr = pd.read_csv(filepath_tr).loc[:,('ba_bcl2', 'ba_bclxl', 'ba_bclw')].rename(columns={'ba_bcl2':'bcl2', 'ba_bclxl':'bclxl', 'ba_bclw':'bclw'})

In [None]:
## generated by ReBADD-SE
filepath_rebadd = os.path.join('outputs_7_calculate_properties_generated', 'frag+reinforce+scst+offpolicy', 'smi_after.csv.0')
df_rebadd = pd.read_csv(filepath_rebadd).loc[:,('bcl2', 'bclxl', 'bclw')].drop_duplicates()

In [None]:
## generated by RationaleRL
filepath_rationale = os.path.join('baseline', 'RationaleRL', 'smi_after.csv.0')
df_rationale = pd.read_csv(filepath_rationale).loc[:,('bcl2', 'bclxl', 'bclw')].drop_duplicates()

In [None]:
## generated by MARS
filepath_mars = os.path.join('baseline', 'MARS', 'smi_after.csv.0')
df_mars = pd.read_csv(filepath_mars).loc[:,('bcl2', 'bclxl', 'bclw')].drop_duplicates()

In [None]:
## generated by ReLeaSE
filepath_release = os.path.join('baseline', 'ReLeaSE', 'smi_after.csv.0')
df_release = pd.read_csv(filepath_release).loc[:,('bcl2', 'bclxl', 'bclw')].drop_duplicates()

In [None]:
## generated by MolGPT
filepath_molgpt = os.path.join('baseline', 'MolGPT', 'smi_after.csv.0')
df_molgpt = pd.read_csv(filepath_molgpt).loc[:,('bcl2', 'bclxl', 'bclw')].drop_duplicates()

In [None]:
df_tr_ = df_tr.copy()
df_tr_.loc[:,'label'] = 'Training'

df_rebadd_ = df_rebadd.copy()
df_rebadd_.loc[:,'label'] = 'ReBADD-SE'

df_rationale_ = df_rationale.copy()
df_rationale_.loc[:,'label'] = 'RationaleRL'

df_mars_ = df_mars.copy()
df_mars_.loc[:,'label'] = 'MARS'

df_release_ = df_release.copy()
df_release_.loc[:,'label'] = 'ReLeaSE'

df_molgpt_ = df_molgpt.copy()
df_molgpt_.loc[:,'label'] = 'MolGPT'

df_merged = pd.concat((df_rebadd_, df_rationale_, df_mars_, df_release_, df_molgpt_, df_tr_), ignore_index=True)

In [None]:
df_merged.groupby(by='label').mean(numeric_only=True)

In [None]:
df_merged.groupby(by='label').std(numeric_only=True)

In [None]:
target = 'bcl2'

fig, ax = plt.subplots(1,1,figsize=(5,2.5))

_ = sns.kdeplot(data=df_merged, x=target, hue='label', common_norm=False, fill=True, legend=True, ax=ax)

ax.set_xlabel('Binding Affinity (pKd) against Bcl-2')

plt.tight_layout()
#plt.savefig(os.path.join(output_dir, "BA_distribution_analysis_bcl2.png"), dpi=600)

In [None]:
target = 'bclxl'

fig, ax = plt.subplots(1,1,figsize=(5,2.5))

_ = sns.kdeplot(data=df_merged, x=target, hue='label', common_norm=False, fill=True, legend=False, ax=ax)

ax.set_xlabel('Binding Affinity (pKd) against Bcl-xl')

plt.tight_layout()
plt.savefig(os.path.join(output_dir, "BA_distribution_analysis_bclxl.png"), dpi=600)

In [None]:
target = 'bclw'

fig, ax = plt.subplots(1,1,figsize=(5,2.5))

_ = sns.kdeplot(data=df_merged, x=target, hue='label', common_norm=False, fill=True, legend=False, ax=ax)

ax.set_xlabel('Binding Affinity (pKd) against Bcl-w')

plt.tight_layout()
plt.savefig(os.path.join(output_dir, "BA_distribution_analysis_bclw.png"), dpi=600)