In [12]:
import pandas as pd
import math
my_round = lambda x: round(x, 4 - math.ceil(math.log10(abs(x)))) if x > 0 else 0

main_methods_dic = {
    'logdet_ranking_normalize' : 'SUBMO-GNN', 
    'maxsum_dissim_ranking_maccs' : 'MS-MK', 
    'maxmin_dissim_ranking_maccs' : 'MM-MK', 
    'maxsum_dissim_ranking_ecfp' : 'MS-EF', 
    'maxmin_dissim_ranking_ecfp' : 'MM-EF', 
    'random_ranking' : 'Random', 
}

ablation_methods_dic = {
    'logdet_ranking' : 'SUBMO w/o N',
    'logdet_ranking_normalize' : 'SUBMO w/ N',
    'maxsum_dissim_ranking' : 'MS w/o N',
    'maxsum_dissim_ranking_normalize' : 'MS w/ N',
    'maxmin_dissim_ranking' : 'MM w/o N',
    'maxmin_dissim_ranking_normalize' : 'MM w/ N',
    #'random_ranking' : 'Random',
}

properties_dic = {
    'mu': 'mu (Debye)', 
    'alpha': r'alpha (Bohr${}^3$)', 
    'homo': 'HOMO (Hartree)', 
    'lumo': 'LUMO (Hartree)', 
    'gap': 'gap (Hartree)', 
    'r2': r'R2 (Bohr${}^2$)', 
    'zpve': 'ZPVE (Hartree)',  
    'u0': 'U0 (Hartree)', 
    'u298': 'U (Hartree)', 
    'h298': 'H (Hartree)', 
    'g298': 'G (Hartree)', 
    'cv': 'Cv (cal/(mol K))',
    'zpve/3n-6': 'ZPVE/Nmode (Hartree)',
    'u0/Nelec': 'U0/Nelec (Hartree)', 
    'u298/Nelec': 'U/Nelec (Hartree)', 
    'h298/Nelec': 'H/Nelec (Hartree)', 
    'g298/Nelec': 'G/Nelec (Hartree)', 
    'cv/3n-6': 'Cv/Nmode (cal/(mol K))',
}

fingerprints_dic = {'maccs': 'MPD-MK', 'ecfp': 'MPD-EF'}

# Make wdud_main.tex

In [2]:
df_ave = pd.read_csv('wdud_mean.csv')
df_std = pd.read_csv('wdud_std.csv')

df_ave = df_ave[df_ave['method'].isin(main_methods_dic.keys())]
df_std = df_std[df_std['method'].isin(main_methods_dic.keys())]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for property in properties_dic.keys():
    aves = list(map(my_round, df_ave[df_ave['property'] == property]['distance']))
    stds = list(map(my_round, df_std[df_std['property'] == property]['distance']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == min(aves):
            val = '\mathbf{' + val + '}' 
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=main_methods_dic.keys())
out_df = out_df.rename(index=main_methods_dic).T
out_df.to_latex('wudu_main.tex', escape=False, column_format = 'lrrrrrr')

# Make wdud_ablation.tex

In [3]:
df_ave = pd.read_csv('wdud_mean.csv')
df_std = pd.read_csv('wdud_std.csv')

df_ave = df_ave[df_ave['method'].isin(ablation_methods_dic)]
df_std = df_std[df_std['method'].isin(ablation_methods_dic)]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for property in properties_dic.keys():
    aves = list(map(my_round, df_ave[df_ave['property'] == property]['distance']))
    stds = list(map(my_round, df_std[df_std['property'] == property]['distance']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == min(aves):
            val = '\mathbf{' + val + '}' 
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=ablation_methods_dic.keys())
out_df = out_df.rename(index=ablation_methods_dic).T
out_df.to_latex('wdud_ablation.tex', escape=False, column_format = 'lrrrrrrr')

# Ablation: two tables

In [8]:
df_ave = pd.read_csv('wdud_mean.csv')
df_std = pd.read_csv('wdud_std.csv')

df_ave = df_ave[df_ave['method'].isin(ablation_methods_dic)]
df_std = df_std[df_std['method'].isin(ablation_methods_dic)]

methods1 = [
    'logdet_ranking',
    'logdet_ranking_normalize',
    'maxsum_dissim_ranking',
    ]
    
methods2 = [
    'maxsum_dissim_ranking_normalize',
    'maxmin_dissim_ranking',
    'maxmin_dissim_ranking_normalize',
]

df_ave1 = df_ave[df_ave['method'].isin(methods1)]
df_std1 = df_std[df_std['method'].isin(methods1)]

out_df = pd.DataFrame([], index=df_ave1['method'].unique())
for property in properties_dic.keys():
    aves = list(map(my_round, df_ave1[df_ave1['property'] == property]['distance']))
    stds = list(map(my_round, df_std1[df_std1['property'] == property]['distance']))
    aves_all = list(map(my_round, df_ave[df_ave['property'] == property]['distance']))
    stds_all = list(map(my_round, df_std[df_std['property'] == property]['distance']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == min(aves_all):
            val = '\mathbf{' + val + '}' 
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=methods1)
out_df = out_df.rename(index=ablation_methods_dic).T
out_df.to_latex('wdud_ablation1.tex', escape=False, column_format = 'lrrr')

df_ave2 = df_ave[df_ave['method'].isin(methods2)]
df_std2 = df_std[df_std['method'].isin(methods2)]

out_df = pd.DataFrame([], index=df_ave2['method'].unique())
for property in properties_dic.keys():
    aves = list(map(my_round, df_ave2[df_ave2['property'] == property]['distance']))
    stds = list(map(my_round, df_std2[df_std2['property'] == property]['distance']))
    aves_all = list(map(my_round, df_ave[df_ave['property'] == property]['distance']))
    stds_all = list(map(my_round, df_std[df_std['property'] == property]['distance']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == min(aves_all):
            val = '\mathbf{' + val + '}' 
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=methods2)
out_df = out_df.rename(index=ablation_methods_dic).T
out_df.to_latex('wdud_ablation2.tex', escape=False, column_format = 'lrrr')

mu 10.38 10.38
mu 10.71 10.38
mu 11.85 10.38
alpha 38.7 36.62
alpha 37.0 36.62
alpha 37.43 36.62
homo 0.02894 0.02768
homo 0.02768 0.02768
homo 0.04238 0.02768
lumo 0.02164 0.02164
lumo 0.02894 0.02164
lumo 0.03242 0.02164
gap 0.03414 0.03414
gap 0.03545 0.03414
gap 0.05385 0.03414
r2 373.4 216.6
r2 355.5 216.6
r2 216.6 216.6
zpve 0.02798 0.01587
zpve 0.02296 0.01587
zpve 0.01587 0.01587
u0 92.31 68.93
u0 86.19 68.93
u0 68.93 68.93
u298 92.31 68.93
u298 86.19 68.93
u298 68.93 68.93
h298 92.31 68.93
h298 86.19 68.93
h298 68.93 68.93
g298 92.32 68.94
g298 86.2 68.94
g298 68.94 68.94
cv 5.814 4.785
cv 5.695 4.785
cv 4.898 4.785
zpve/3n-6 0.001574 0.001496
zpve/3n-6 0.001532 0.001496
zpve/3n-6 0.00155 0.001496
u0/Nelec 0.3768 0.2161
u0/Nelec 0.3876 0.2161
u0/Nelec 0.2161 0.2161
u298/Nelec 0.3768 0.2161
u298/Nelec 0.3876 0.2161
u298/Nelec 0.2161 0.2161
h298/Nelec 0.3768 0.2161
h298/Nelec 0.3876 0.2161
h298/Nelec 0.2161 0.2161
g298/Nelec 0.3768 0.2161
g298/Nelec 0.3876 0.2161
g298/Nelec 0.21

# Make mpd.tex

In [5]:
df_ave = pd.read_csv('mpd_mean.csv')
df_std = pd.read_csv('mpd_std.csv')

df_ave = df_ave[df_ave['method'].isin(main_methods_dic.keys())]
df_std = df_std[df_std['method'].isin(main_methods_dic.keys())]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for fingerprint in fingerprints_dic.keys():
    aves = list(map(my_round, df_ave[df_ave['fingerprint'] == fingerprint]['mpd']))
    stds = list(map(my_round, df_std[df_std['fingerprint'] == fingerprint]['mpd']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == max(aves):
            val = '\mathbf{ ' + val + '}' 
        out.append('$' + val + '$')        
    out_df[fingerprints_dic[fingerprint]] = out
out_df = out_df.reindex(index=main_methods_dic.keys())
out_df = out_df.rename(index=main_methods_dic).T
out_df.to_latex('mpd.tex', escape=False, column_format = 'lrrrrrr')