# Summarize the results from the GuacaMol benchmark

In [52]:
import pandas as pd 
import json 
from pathlib import Path
from glob import glob
import numpy as np 
import matplotlib.pyplot as plt

import seaborn as sns
plt.style.use('science')


plt.rcParams.update({
#     "font.family": "serif",   # specify font family here
#     "font.serif": ["Times"],  # specify font here
    "font.size":10})          # specify font size here

In [12]:
def get_results_from_json(file):
    with open(file, 'r') as fh: 
        results = json.loads(fh.read())
        
    results_dict = {}
    for d in results['results']:
        results_dict[d['benchmark_name']] = d['score']
    
    results_dict['name'] = Path(file).stem
    return results_dict

In [18]:
results = []


for file in glob("*.json"):
    results.append(get_results_from_json(file))

In [19]:
df = pd.DataFrame(results)

In [21]:
df.index = df['name']

In [48]:
not_ours = ["graph_mcts", "smiles_ga", "graph_ga", "best_of_dataset", "smiles_lstm"]

In [53]:
maxima = {}

for column in df.columns:
    if column != 'name':
        maxima[column] = np.max(df[column][not_ours])
    else:
        maxima['name'] = 'leading'

In [55]:
df_maxima = pd.DataFrame([maxima])
df_maxima.index = df_maxima['name']

In [56]:
df_all = pd.concat([df, df_maxima])

In [73]:
df_selected = df_all.loc[['smiles_ga', 'leading'], :]

In [79]:
print("\\toprule")
print("benchmark & " + " & ".join([f for f in df_selected.index if f != 'name']) + '\\\\')
print('\\midrule')
for column in df_selected.columns: 
    if column != 'name': 
        print(column + " & " + " & ".join(["{:.2f}".format(v) for v in df_selected[column].tolist()] ) + "\\\\")
print("\\bottomrule")

\toprule
benchmark & smiles_ga & leading\\
\midrule
Celecoxib rediscovery & 0.73 & 1.00\\
Troglitazone rediscovery & 0.51 & 1.00\\
Thiothixene rediscovery & 0.60 & 1.00\\
Aripiprazole similarity & 0.83 & 1.00\\
Albuterol similarity & 0.91 & 1.00\\
Mestranol similarity & 0.79 & 1.00\\
C11H24 & 0.83 & 0.99\\
C9H10N2O2PF2Cl & 0.89 & 0.98\\
Median molecules 1 & 0.33 & 0.44\\
Median molecules 2 & 0.38 & 0.43\\
Osimertinib MPO & 0.89 & 0.95\\
Fexofenadine MPO & 0.93 & 1.00\\
Ranolazine MPO & 0.88 & 0.92\\
Perindopril MPO & 0.66 & 0.81\\
Amlodipine MPO & 0.72 & 0.89\\
Sitagliptin MPO & 0.69 & 0.89\\
Zaleplon MPO & 0.41 & 0.75\\
Valsartan SMARTS & 0.55 & 0.99\\
Scaffold Hop & 0.97 & 1.00\\
Deco Hop & 0.88 & 1.00\\
\bottomrule
