In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from scipy.stats import wilcoxon
pd.set_option('display.max_rows', 25)
sns.set(style='whitegrid')

In [None]:
result_df = pd.read_csv('../result/evaluation_summary.tsv', sep='\t')
# rename methods with their display names
methods_dict = {
    'orig-py': 'Original\nPython',
    'orig-cpp': 'Original\nC++',
    'nodevectors': 'NodeVectors',
    'pecanpy-PreComp': 'PecanPy\nPreComp',
    'pecanpy-SparseOTF': 'PecanPy\nSparseOTF',
    'pecanpy-DenseOTF': 'PecanPy\nDenseOTF'
}

for method, name in methods_dict.items():
    result_df["Method"].replace(method, name, inplace=True)
result_df

In [None]:
plt.figure(figsize=(9.8,5))
color_dict = {
    'Original\nPython': 'skyblue',
    'Original\nC++': 'deepskyblue',
    'PecanPy\nPreComp': '#EED489',
    'PecanPy\nSparseOTF': '#E1B25B',
    'PecanPy\nDenseOTF': '#D79640',
    'NodeVectors': 'grey'
}
f = sns.boxplot(data=result_df, x='Network', y='auROC', hue='Method', 
                notch=True, palette=color_dict, 
                order=["PPI", "BlogCatalog", "Wikipedia"])
f.legend(loc='lower center', bbox_to_anchor=(0.5, -0.24), ncol=6)
plt.xticks(fontsize=14)
plt.xlabel('')
plt.tight_layout()
# plt.savefig("FigS10.png", dpi=90)
plt.show()

In [None]:
statistics_df = pd.DataFrame()
methods = list(result_df['Method'].unique())
methods.pop(methods.index('Original\nPython'))
methods_converted = [i.replace('\n', ' ') for i in methods]
for network in result_df['Network'].unique():
    result_dict = {}
    for method in methods:
        result_dict[method] = result_df[(result_df['Network'] == network) &
                                                 (result_df['Method'] == method)]['auROC'].values

    orig_score = result_df[(result_df['Network'] == network) & 
                      (result_df['Method'] == 'Original\nPython')]['auROC'].values
    
    pval_lst = []
    for i, j in result_dict.items():
        pval_lst.append(f'{wilcoxon(orig_score,j)[1]:.3e}')
    
    tmp_df = pd.DataFrame()
    tmp_df['Wilcoxon p-val'] = pval_lst
    tmp_df['Method Compared'] = methods_converted
    tmp_df['Network'] = network
    statistics_df = pd.concat([statistics_df, tmp_df], ignore_index=True)

statistics_df.to_csv('../result/wilcoxon_statistics.tsv', sep='\t', index=False)
statistics_df