In [20]:
import pandas as pd
import math

main_methods_dic = {
    'logdet_ranking_normalize' : 'SUBMO-GNN', 
    'wgreedy_ranking' : 'WG-GNN', 
    'maxsum_dissim_ranking_maccs' : 'MS-MK', 
    'maxmin_dissim_ranking_maccs' : 'MM-MK', 
    'maxsum_dissim_ranking_ecfp' : 'MS-EF', 
    'maxmin_dissim_ranking_ecfp' : 'MM-EF', 
    'random_ranking' : 'Random', 
}

ablation_methods_dic = {
    'logdet_ranking' : 'SUBMO w/o N',
    'logdet_ranking_normalize' : 'SUBMO w/ N',
    'maxsum_dissim_ranking' : 'MS w/o N',
    'maxsum_dissim_ranking_normalize' : 'MS w/ N',
    'maxmin_dissim_ranking' : 'MM w/o N',
    'maxmin_dissim_ranking_normalize' : 'MM w/ N',
    #'random_ranking' : 'Random',
}

properties_dic = {
    'mu': 'mu (Debye)', 
    'alpha': r'alpha (Bohr${}^3$)', 
    'homo': 'HOMO (Hartree)', 
    'lumo': 'LUMO (Hartree)', 
    'gap': 'gap (Hartree)', 
    'r2': r'R2 (Bohr${}^2$)', 
    'zpve': 'ZPVE (Hartree)',  
    'u0': 'U0 (Hartree)', 
    'u298': 'U (Hartree)', 
    'h298': 'H (Hartree)', 
    'g298': 'G (Hartree)', 
    'cv': 'Cv (cal/(mol K))',
    'zpve/3n-6': r'ZPVE/$N_{\rm mode}$ (Hartree)',
    'u0/Nelec':  r'U0/$N_{\rm elec}$ (Hartree)', 
    'u298/Nelec': r'U/$N_{\rm elec}$ (Hartree)', 
    'h298/Nelec': r'H/$N_{\rm elec}$ (Hartree)', 
    'g298/Nelec': r'G/$N_{\rm elec}$ (Hartree)', 
    'cv/3n-6': r'Cv/$N_{\rm mode}$ (cal/(mol K))',
}

properties_dic_others = {
    'measured log solubility in mols per litre' : 'ESOL',
    'expt' : 'FreeSolv', 
    'exp' : 'Lipop',   
}

fingerprints_dic = {'maccs': 'MPD-MK', 'ecfp': 'MPD-EF'}

fingerprints_dic_others = {
    'delaney_maccs' : 'ESOL-MPD-MK', 
    'delaney_ecfp' : 'ESOL-MPD-EF', 
    'sampl_maccs' : 'FreeSolv-MPD-MK', 
    'sampl_ecfp' : 'FreeSolv-MPD-EF', 
    'lipo_maccs' : 'Lipop-MPD-MK', 
    'lipo_ecfp' : 'Lipop-MPD-EF', 
}


def new_round(ave, std):
    pw_ave = math.ceil(math.log10(abs(ave)))
    formatter = '{:.' + '{}'.format(4 - pw_ave) + 'f}'
    s_ave = formatter.format(ave)
    if std <= 0:
        return s_ave
    else:
        s_std = formatter.format(std)
        return s_ave + ' \pm ' + s_std

# Make wdud_main_qm9.tex

In [22]:
df_ave = pd.read_csv('wdud_mean.csv')
df_std = pd.read_csv('wdud_std.csv')

df_ave = df_ave[df_ave['method'].isin(main_methods_dic.keys())]
df_std = df_std[df_std['method'].isin(main_methods_dic.keys())]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for property in properties_dic.keys():
    aves = list(df_ave[df_ave['property'] == property]['distance'])
    stds = list(df_std[df_std['property'] == property]['distance'])
    out = []
    for ave, std in zip(aves, stds):
        val = new_round(ave, std)
        if ave == min(aves):
            val = val.replace('e-', '\textbf{e-}')
            val = '\mathbf{ ' + val + '}' 
        else:
            val = val.replace('e-', '\text{e-}')
        out.append('$' + val + '$')
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=main_methods_dic.keys())
out_df = out_df.rename(index=main_methods_dic).T
out_df.to_latex('wudu_main_qm9.tex', escape=False, column_format = 'lrrrrrrr')

# Make wdud_ablation.tex

In [23]:
df_ave = pd.read_csv('wdud_mean.csv')
df_std = pd.read_csv('wdud_std.csv')

df_ave = df_ave[df_ave['method'].isin(ablation_methods_dic)]
df_std = df_std[df_std['method'].isin(ablation_methods_dic)]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for property in properties_dic.keys():
    aves = list(df_ave[df_ave['property'] == property]['distance'])
    stds = list(df_std[df_std['property'] == property]['distance'])
    out = []
    for ave, std in zip(aves, stds):
        val = new_round(ave, std)
        if ave == min(aves):
            val = val.replace('e-', '\textbf{e-}')
            val = '\mathbf{ ' + val + '}' 
        else:
            val = val.replace('e-', '\text{e-}')
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=ablation_methods_dic.keys())
out_df = out_df.rename(index=ablation_methods_dic).T
out_df.to_latex('wdud_ablation.tex', escape=False, column_format = 'lrrrrrrr')

# Ablation: two tables

In [24]:
df_ave = pd.read_csv('wdud_mean.csv')
df_std = pd.read_csv('wdud_std.csv')

df_ave = df_ave[df_ave['method'].isin(ablation_methods_dic)]
df_std = df_std[df_std['method'].isin(ablation_methods_dic)]

methods1 = [
    'logdet_ranking',
    'logdet_ranking_normalize',
    'maxsum_dissim_ranking',
    ]
    
methods2 = [
    'maxsum_dissim_ranking_normalize',
    'maxmin_dissim_ranking',
    'maxmin_dissim_ranking_normalize',
]

df_ave1 = df_ave[df_ave['method'].isin(methods1)]
df_std1 = df_std[df_std['method'].isin(methods1)]

out_df = pd.DataFrame([], index=df_ave1['method'].unique())
for property in properties_dic.keys():
    aves = list(df_ave1[df_ave1['property'] == property]['distance'])
    stds = list(df_std1[df_std1['property'] == property]['distance'])
    aves_all = list(df_ave[df_ave['property'] == property]['distance'])
    stds_all = list(df_std[df_std['property'] == property]['distance'])
    out = []
    for ave, std in zip(aves, stds):
        val = new_round(ave, std)
        if ave == min(aves_all):
            val = val.replace('e-', '\textbf{e-}')
            val = '\mathbf{ ' + val + '}' 
        else:
            val = val.replace('e-', '\text{e-}')
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=methods1)
out_df = out_df.rename(index=ablation_methods_dic).T
out_df.to_latex('wdud_ablation1.tex', escape=False, column_format = 'lrrr')

df_ave2 = df_ave[df_ave['method'].isin(methods2)]
df_std2 = df_std[df_std['method'].isin(methods2)]

out_df = pd.DataFrame([], index=df_ave2['method'].unique())
for property in properties_dic.keys():
    aves = list(df_ave2[df_ave2['property'] == property]['distance'])
    stds = list(df_std2[df_std2['property'] == property]['distance'])
    aves_all = list(df_ave[df_ave['property'] == property]['distance'])
    stds_all = list(df_std[df_std['property'] == property]['distance'])
    out = []
    for ave, std in zip(aves, stds):
        val = new_round(ave, std)
        if ave == min(aves_all):
            val = val.replace('e-', '\textbf{e-}')
            val = '\mathbf{ ' + val + '}' 
        else:
            val = val.replace('e-', '\text{e-}')
        out.append('$' + val + '$')        
    out_df[properties_dic[property]] = out
out_df = out_df.reindex(index=methods2)
out_df = out_df.rename(index=ablation_methods_dic).T
out_df.to_latex('wdud_ablation2.tex', escape=False, column_format = 'lrrr')

# Make mpd_qm9.tex

In [46]:
df_ave = pd.read_csv('mpd_mean.csv')
df_std = pd.read_csv('mpd_std.csv')

df_ave = df_ave[df_ave['method'].isin(main_methods_dic.keys())]
df_std = df_std[df_std['method'].isin(main_methods_dic.keys())]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for fingerprint in fingerprints_dic.keys():
    aves = list(df_ave[df_ave['fingerprint'] == fingerprint]['mpd'])
    stds = list(df_std[df_std['fingerprint'] == fingerprint]['mpd'])
    out = []
    for ave, std in zip(aves, stds):
        val = new_round(ave, std)
        if ave == max(aves):
            val = val.replace('e-', '\textbf{e-}')
            val = '\mathbf{ ' + val + '}' 
        else:
            val = val.replace('e-', '\text{e-}')
        out.append('$' + val + '$')        
    out_df[fingerprints_dic[fingerprint]] = out
out_df = out_df.reindex(index=main_methods_dic.keys())
out_df = out_df.rename(index=main_methods_dic).T
out_df.to_latex('mpd_qm9.tex', escape=False, column_format = 'lrrrrrrr')

# Make wdud_others.tex

In [47]:
df_ave = pd.read_csv('wdud_mean_others.csv')
df_std = pd.read_csv('wdud_std_others.csv')

df_ave = df_ave[df_ave['method'].isin(main_methods_dic.keys())]
df_std = df_std[df_std['method'].isin(main_methods_dic.keys())]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for property in properties_dic_others.keys():
    aves = list(df_ave[df_ave['property'] == property]['distance'])
    stds = list(df_std[df_std['property'] == property]['distance'])
    out = []
    for ave, std in zip(aves, stds):
        val = new_round(ave, std)
        if ave == min(aves):
            val = val.replace('e-', '\textbf{e-}')
            val = '\mathbf{ ' + val + '}' 
        else:
            val = val.replace('e-', '\text{e-}')
        out.append('$' + val + '$')        
    out_df[properties_dic_others[property]] = out
out_df = out_df.reindex(index=main_methods_dic.keys())
out_df = out_df.rename(index=main_methods_dic).T
out_df.to_latex('wudu_others.tex', escape=False, column_format = 'lrrrrrrr')

# Make mpd_others.tex

In [107]:
df_ave = pd.read_csv('mpd_mean_others.csv')
df_std = pd.read_csv('mpd_std_others.csv')

df_ave = df_ave[df_ave['method'].isin(main_methods_dic.keys())]
df_std = df_std[df_std['method'].isin(main_methods_dic.keys())]

out_df = pd.DataFrame([], index=df_ave['method'].unique())
for fingerprint in fingerprints_dic_others.keys():
    aves = list(df_ave[df_ave['fingerprint'] == fingerprint]['mpd'])
    stds = list(df_std[df_std['fingerprint'] == fingerprint]['mpd'])
    out = []
    for ave, std in zip(aves, stds):
        val = new_round(ave, std)
        if ave == max(aves):
            val = val.replace('e-', '\textbf{e-}')
            val = '\mathbf{ ' + val + '}' 
        else:
            val = val.replace('e-', '\text{e-}')
        out.append('$' + val + '$')
    out_df[fingerprints_dic_others[fingerprint]] = out
out_df = out_df.reindex(index=main_methods_dic.keys())
out_df = out_df.rename(index=main_methods_dic).T
out_df.to_latex('mpd_others.tex', escape=False, column_format = 'lrrrrrrr')

# Prediction accuracy

In [113]:
methods = ['w/o normalization', 'w/ normalization']
df_ave = pd.read_csv('qm9_prediction_mean.csv')
df_std = pd.read_csv('qm9_prediction_std.csv')

out_df = pd.DataFrame([], index=methods)
for property in df_ave.columns[1:]:
    aves = list(df_ave[property])
    stds = list(df_std[property])
    out = []
    for ave, std in zip(aves, stds):
        val = new_round(ave, std)
        val = val.replace('e-', '\text{e-}')
        out.append('$' + val + '$')
    out_df[properties_dic[property]] = out

out_df = out_df.T    
out_df.to_latex('pred_acc.tex', escape=False, column_format = 'lrr')