In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv("results_nseeds10_nepochs50.csv")

In [2]:
df.head()

Unnamed: 0.1,Unnamed: 0,recall,recall_std,ndcg,ndcg_std,precision,precision_std,map,map_std,modelname
0,15,0.101967,0.003116,0.172126,0.004536,0.157699,0.002881,0.042306,0.001865,attention|direct|attention
1,11,0.099568,0.004895,0.163423,0.006681,0.150636,0.005473,0.040033,0.002527,degree_norm|multi_linear|attention
2,12,0.098907,0.0029,0.166432,0.004928,0.153807,0.00479,0.039647,0.001288,attention|direct|mean
3,4,0.096469,0.00382,0.15925,0.00612,0.146416,0.004779,0.038658,0.002436,degree_norm|single_linear|mean
4,23,0.096813,0.004471,0.160079,0.00644,0.148706,0.004747,0.038033,0.002871,attention|multi_linear|attention


In [3]:
num_models = len(df)
# print(num_models)
results = {}
metric_names = ['recall', 'ndcg', 'precision', 'map']
hparams = [
    ['degree_norm', 'attention'],
    ['direct', 'single_linear', 'multi_linear'],
    ['mean', 'concat', 'weighted', 'attention']
]

hparams_group_names = ['neighbor_aggregator', 'info_updater', 'final_node_repr']
hparams_group_names_lookup = dict(zip(range(len(hparams)), hparams_group_names))

for metric_name in metric_names:
    
    results[metric_name] = {}
    df = df.sort_values(by=[metric_name], ascending=False)
    model_names = df['modelname'].tolist()
    model_names = [name.split('|') for name in model_names]
    model_rankings = list(zip(range(num_models), model_names))
    
    for i, params in enumerate(hparams):
        results[metric_name][hparams_group_names_lookup[i]] = {}
        for p_val in params:
            ranks = np.array([x[0] for x in model_rankings if x[1][i] == p_val])/num_models
            results[metric_name][hparams_group_names_lookup[i]][p_val] = (np.mean(ranks), np.std(ranks))

print('==========')

for metric_name in metric_names:
    print(metric_name)
    for i, hparam_group_name in enumerate(hparams_group_names):
        print(f'  {hparam_group_name}')
        ranks = list(results[metric_name][hparam_group_name].items())
        ranks.sort(key=lambda x:x[1][0])
        for hparam_val, (mean, stddev) in ranks:
            print(f'    {hparam_val}: {mean:.3f} (std {stddev:.3f})')

print('=====mean=====')

for metric_name in metric_names:
#     print(metric_name)
    for i, hparam_group_name in enumerate(hparams_group_names):
#         print(f'  {hparam_group_name}')
        ranks = list(results[metric_name][hparam_group_name].items())
        ranks.sort(key=lambda x:x[1][0])
        for hparam_val, (mean, stddev) in ranks:
            print(f'{mean:.3f}')
            
print('=====stddev=====')
            
for metric_name in metric_names:
#     print(metric_name)
    for i, hparam_group_name in enumerate(hparams_group_names):
#         print(f'  {hparam_group_name}')
        ranks = list(results[metric_name][hparam_group_name].items())
        ranks.sort(key=lambda x:x[1][0])
        for hparam_val, (mean, stddev) in ranks:
            print(f'{stddev:.3f}')
            

recall
  neighbor_aggregator
    attention: 0.451 (std 0.289)
    degree_norm: 0.507 (std 0.285)
  info_updater
    single_linear: 0.406 (std 0.206)
    multi_linear: 0.474 (std 0.253)
    direct: 0.557 (std 0.363)
  final_node_repr
    attention: 0.236 (std 0.263)
    mean: 0.306 (std 0.144)
    weighted: 0.569 (std 0.166)
    concat: 0.806 (std 0.107)
ndcg
  neighbor_aggregator
    attention: 0.441 (std 0.298)
    degree_norm: 0.517 (std 0.273)
  info_updater
    single_linear: 0.391 (std 0.204)
    multi_linear: 0.490 (std 0.234)
    direct: 0.557 (std 0.373)
  final_node_repr
    attention: 0.236 (std 0.275)
    mean: 0.319 (std 0.152)
    weighted: 0.562 (std 0.171)
    concat: 0.799 (std 0.106)
precision
  neighbor_aggregator
    attention: 0.441 (std 0.298)
    degree_norm: 0.517 (std 0.273)
  info_updater
    single_linear: 0.391 (std 0.204)
    multi_linear: 0.490 (std 0.234)
    direct: 0.557 (std 0.373)
  final_node_repr
    attention: 0.236 (std 0.275)
    mean: 0.319 (std 