In [15]:
import pandas as pd
import math
my_round = lambda x: round(x, 4 - math.ceil(math.log10(abs(x)))) if x > 0 else 0

main_methods_dic = {
    'logdet_ranking_normalize' : 'SUBMO-GNN', 
    'maxsum_dissim_ranking_maccs' : 'MS-MK', 
    'maxmin_dissim_ranking_maccs' : 'MM-MK', 
    'maxsum_dissim_ranking_ecfp' : 'MS-EF', 
    'maxmin_dissim_ranking_ecfp' : 'MM-EF', 
    'random_ranking' : 'Random', 
}

ablation_methods_dic = {
    'logdet_ranking' : 'SUBMO w/o N',
    'logdet_ranking_normalize' : 'SUBMO w/ N',
    'maxsum_dissim_ranking' : 'MS w/o N',
    'maxsum_dissim_ranking_normalize' : 'MS w/ N',
    'maxmin_dissim_ranking' : 'MM w/o N',
    'maxmin_dissim_ranking_normalize' : 'MM w/ N',
    'random_ranking' : 'Random',
}

properties_dic = {
    'mu': 'mu (Debye)', 
    'alpha': r'alpha (Bohr${}^3$)', 
    'homo': 'HOMO (Hartree)', 
    'lumo': 'LUMO (Hartree)', 
    'gap': 'gap (Hartree)', 
    'r2': r'R2 (Bohr${}^2$)', 
    'zpve': 'ZPVE (Hartree)',  
    'u0': 'U0 (Hartree)', 
    'u298': 'U (Hartree)', 
    'h298': 'H (Hartree)', 
    'g298': 'G (Hartree)', 
    'cv': 'Cv (cal/(mol K))',
    'u0 per elec': 'U0/Nelec (Hartree)', 
    'u298 per elec': 'U/Nelec (Hartree)', 
    'h298 per elec': 'H/Nelec (Hartree)', 
    'g298 per elec': 'G/Nelec (Hartree)', 
    #'cv/3n-6': 'Cv/Nmode (cal/(mol K))',
}

fingerprints_dic = {'maccs': 'MPD-MK', 'ecfp': 'MPD-EF'}

# Make wdud_main.tex

In [16]:
df_ave = pd.read_csv('wdud_mean.csv')
df_std = pd.read_csv('wdud_std.csv')

df_ave = df_ave[df_ave['method'].isin(main_methods_dic.keys())]
df_std = df_std[df_std['method'].isin(main_methods_dic.keys())]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for property in properties_dic.keys():
    aves = list(map(my_round, df_ave[df_ave['property'] == property]['distance']))
    stds = list(map(my_round, df_std[df_std['property'] == property]['distance']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == min(aves):
            val = '\mathbf{' + val + '}' 
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=main_methods_dic.keys())
out_df = out_df.rename(index=main_methods_dic).T
out_df.to_latex('wudu_main.tex', escape=False, column_format = 'lrrrrrr')

# Make wdud_ablation.tex

In [17]:
df_ave = pd.read_csv('wdud_mean.csv')
df_std = pd.read_csv('wdud_std.csv')

df_ave = df_ave[df_ave['method'].isin(ablation_methods_dic)]
df_std = df_std[df_std['method'].isin(ablation_methods_dic)]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for property in properties_dic.keys():
    aves = list(map(my_round, df_ave[df_ave['property'] == property]['distance']))
    stds = list(map(my_round, df_std[df_std['property'] == property]['distance']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == min(aves):
            val = '\mathbf{' + val + '}' 
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=ablation_methods_dic.keys())
out_df = out_df.rename(index=ablation_methods_dic).T
out_df.to_latex('wdud_ablation.tex', escape=False, column_format = 'lrrrrrrr')

# Ablation: two tables

In [22]:
df_ave = pd.read_csv('wdud_mean.csv')
df_std = pd.read_csv('wdud_std.csv')

methods1 = [
    'logdet_ranking',
    'logdet_ranking_normalize',
    'maxsum_dissim_ranking',
    ]
    
methods2 = [
    'maxsum_dissim_ranking_normalize',
    'maxmin_dissim_ranking',
    'maxmin_dissim_ranking_normalize',
]

df_ave1 = df_ave[df_ave['method'].isin(methods1)]
df_std1 = df_std[df_std['method'].isin(methods1)]

out_df = pd.DataFrame([], index=df_ave1['method'].unique())
for property in properties_dic.keys():
    aves = list(map(my_round, df_ave1[df_ave1['property'] == property]['distance']))
    stds = list(map(my_round, df_std1[df_std1['property'] == property]['distance']))
    aves_all = list(map(my_round, df_ave[df_ave['property'] == property]['distance']))
    stds_all = list(map(my_round, df_std[df_std['property'] == property]['distance']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == min(aves_all):
            val = '\mathbf{' + val + '}' 
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=methods1)
out_df = out_df.rename(index=ablation_methods_dic).T
out_df.to_latex('wdud_ablation1.tex', escape=False, column_format = 'lrrr')

df_ave2 = df_ave[df_ave['method'].isin(methods2)]
df_std2 = df_std[df_std['method'].isin(methods2)]

out_df = pd.DataFrame([], index=df_ave2['method'].unique())
for property in properties_dic.keys():
    aves = list(map(my_round, df_ave2[df_ave2['property'] == property]['distance']))
    stds = list(map(my_round, df_std2[df_std2['property'] == property]['distance']))
    aves_all = list(map(my_round, df_ave[df_ave['property'] == property]['distance']))
    stds_all = list(map(my_round, df_std[df_std['property'] == property]['distance']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == min(aves_all):
            val = '\mathbf{' + val + '}' 
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=methods2)
out_df = out_df.rename(index=ablation_methods_dic).T
out_df.to_latex('wdud_ablation2.tex', escape=False, column_format = 'lrrr')

# Make mpd.tex

In [20]:
df_ave = pd.read_csv('mpd_mean.csv')
df_std = pd.read_csv('mpd_std.csv')

df_ave = df_ave[df_ave['method'].isin(main_methods_dic.keys())]
df_std = df_std[df_std['method'].isin(main_methods_dic.keys())]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for fingerprint in fingerprints_dic.keys():
    aves = list(map(my_round, df_ave[df_ave['fingerprint'] == fingerprint]['mpd']))
    stds = list(map(my_round, df_std[df_std['fingerprint'] == fingerprint]['mpd']))
    out = []
    for ave, std in zip(aves, stds):
        if std <= 0:
            val = (str(ave))
        else:
            val = str(ave) + ' \pm ' + str(std)
        if ave == max(aves):
            val = '\mathbf{ ' + val + '}' 
        out.append('$' + val + '$')        
    out_df[fingerprints_dic[fingerprint]] = out
out_df = out_df.reindex(index=main_methods_dic.keys())
out_df = out_df.rename(index=main_methods_dic).T
out_df.to_latex('mpd.tex', escape=False, column_format = 'lrrrrrr')