# Tutorial - Step5 : MTMR evaluation by comparing to SOTA models

## 1. Import requirements

In [1]:
import os
import pandas as pd
import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
from MTMR.evaluate import evaluate_metric_v2

## 2. Specify a target property

In [3]:
PROPERTY_NAME = "qed"

## 3. Set directories (for inputs and outputs)

In [4]:
input_data_dir = os.path.join("DATA", PROPERTY_NAME)
input_our_dir = os.path.join("outputs_Tutorial_4_MTMR_translation", PROPERTY_NAME)
input_base_dir = os.path.join("DATA", "baselines")

In [5]:
_output_dir = "outputs_Tutorial_5_MTMR_evaluation"
if not os.path.exists(_output_dir):
    os.mkdir(_output_dir)

output_dir = os.path.join(_output_dir, PROPERTY_NAME)
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

## 4. Set file names

In [6]:
filepath_train_rdkit = os.path.join(input_data_dir, "rdkit_train_pairs.txt")
filepath_train = os.path.join(input_base_dir, f"train_pairs_{PROPERTY_NAME.upper()}.txt")

In [7]:
df_pairs_rdkit = pd.read_csv(filepath_train_rdkit, header=None, sep=" ")
df_pairs = pd.read_csv(filepath_train, header=None, sep=" ")
targets_rdkit = set(df_pairs_rdkit.iloc[:,1].values.tolist())
targets = set(df_pairs.iloc[:,1].values.tolist())

In [8]:
filepath_MTMR  = os.path.join(input_our_dir, f"MTMR_{PROPERTY_NAME.upper()}.csv")
#filepath_JTVAE = os.path.join(input_base_dir, f"JTVAE_{PROPERTY_NAME.upper()}.csv")
filepath_G2G   = os.path.join(input_base_dir, f"VJTNN_{PROPERTY_NAME.upper()}.csv")
filepath_CORE  = os.path.join(input_base_dir, f"CORE_{PROPERTY_NAME.upper()}.csv")
filepath_G2GG  = os.path.join(input_base_dir, f"VJTNN+GAN_{PROPERTY_NAME.upper()}.csv")
filepath_MOLCG = os.path.join(input_base_dir, f"MOLCG_{PROPERTY_NAME.upper()}.csv")
filepath_HIER  = os.path.join(input_base_dir, f"HierG2G_{PROPERTY_NAME.upper()}.csv")
filepath_UGMMT = os.path.join(input_base_dir, f"UGMMT_{PROPERTY_NAME.upper()}.csv")

## 5. Load translation results generated from the test dataset

In [9]:
df_MTMR  = pd.read_csv(filepath_MTMR, header=None).fillna("").rename(columns={0:"SOURCE", 1:"TARGET", 2:"SIMILARITY", 3:"PROPERTY"})
#df_JTVAE = pd.read_csv(filepath_JTVAE, header=None).fillna("").rename(columns={0:"SOURCE", 1:"TARGET", 2:"SIMILARITY", 3:"PROPERTY"})
df_G2G   = pd.read_csv(filepath_G2G, header=None).fillna("").rename(columns={0:"SOURCE", 1:"TARGET", 2:"SIMILARITY", 3:"PROPERTY"})
df_CORE  = pd.read_csv(filepath_CORE, header=None).fillna("").rename(columns={0:"SOURCE", 1:"TARGET", 2:"SIMILARITY", 3:"PROPERTY"})
df_G2GG  = pd.read_csv(filepath_G2GG, header=None).fillna("").rename(columns={0:"SOURCE", 1:"TARGET", 2:"SIMILARITY", 3:"PROPERTY"})
df_HIER  = pd.read_csv(filepath_HIER, header=None).fillna("").rename(columns={0:"SOURCE", 1:"TARGET", 2:"SIMILARITY", 3:"PROPERTY"})
df_MOLCG = pd.read_csv(filepath_MOLCG, header=None).fillna("").rename(columns={0:"SOURCE", 1:"TARGET", 2:"SIMILARITY", 3:"PROPERTY"})
df_UGMMT = pd.read_csv(filepath_UGMMT, header=None).fillna("").rename(columns={0:"SOURCE", 1:"TARGET", 2:"SIMILARITY", 3:"PROPERTY"})

## 6. Evaluate metrics

In [10]:
frames = []

for i, (name, df) in enumerate([('MTMR', df_MTMR),
                                #('JTVAE', df_JTVAE),
                                ('VJTNN', df_G2G),
                                ('VJTNN+GAN', df_G2GG),
                                ('CORE', df_CORE),
                                ('HierG2G', df_HIER),
                                ('MolCycleGAN', df_MOLCG),
                                ('UGMMT', df_UGMMT)]):
    print(f"NOW: {i}")
    df_metric = evaluate_metric_v2(df_MTMR, targets_rdkit, num_decode=20,
                                   threshold_sim=0.4,
                                   threshold_pro=0.9,
                                   use_pool=True).rename(columns={0:name})

NOW: 0


  0%|          | 4/800 [00:39<2:11:10,  9.89s/it]Process ForkPoolWorker-923:
Process ForkPoolWorker-927:
Process ForkPoolWorker-926:
Process ForkPoolWorker-921:
Process ForkPoolWorker-922:
Process ForkPoolWorker-924:
Process ForkPoolWorker-929:
Process ForkPoolWorker-925:
Process ForkPoolWorker-928:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/descartes/anaconda3/envs/MTMR/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/descartes/anaconda3/envs/MTMR/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/descartes/anaconda3/envs/MTMR/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/descartes/anaconda3/envs/MT

  File "/home/descartes/anaconda3/envs/MTMR/lib/python3.6/multiprocessing/pool.py", line 119, in worker
    result = (True, func(*args, **kwds))
  File "/home/descartes/anaconda3/envs/MTMR/lib/python3.6/multiprocessing/pool.py", line 44, in mapstar
    return list(map(*args))
  File "/Data1/descartes/work/DrugStyler/MTMR/MTMR/evaluate.py", line 16, in calc_sim
    return similarity(*args)
  File "/Data1/descartes/work/DrugStyler/MTMR/MTMR/properties.py", line 137, in similarity
    fp1 = GetMorganFingerprintAsBitVect(amol, 2, nBits=2048, useChirality=False)
KeyboardInterrupt
  0%|          | 4/800 [00:45<2:29:25, 11.26s/it]


KeyboardInterrupt: 

In [None]:
df_merged = pd.concat(frames, axis=1).T

In [None]:
df_merged

In [None]:
df_merged.to_csv(os.path.join(output_dir, "table_metrics.csv"))

In [None]:
sns.set_theme(style='darkgrid')

fig, ax = plt.subplots(1,1)
sns.barplot(x=df_merged.index, y=df_merged["SUCCESS"], ax=ax)

plt.tight_layout()
#plt.savefig(os.path.join(output_dir, "barplot_success.png"), dpi=300)