In [84]:
import pandas as pd
import numpy as np
from scipy import stats

In [85]:
gnns_df = pd.read_csv('../reports/results/sns-friendly/gnns.csv')
baseline_df = pd.read_csv('../reports/results/sns-friendly/baseline.csv')

In [86]:
gnns_df.head()

Unnamed: 0,conv,dataset,flow,test_acc,Network
0,GCNConv,cora,SYM,0.78267,original
1,GCNConv,cora,SYM,0.76815,original
2,GCNConv,cora,SYM,0.788759,original
3,GCNConv,cora,SYM,0.776581,original
4,GCNConv,cora,SYM,0.753162,original


In [87]:
for entry in gnns_df[gnns_df.dataset=='cora_full'].groupby(['dataset', 'conv', 'flow', 'Network']).groups:
    print(entry)

('cora_full', 'GATConv', 'SYM', 'conf')
('cora_full', 'GATConv', 'SYM', 'original')
('cora_full', 'GATConv', 'SYM', 'random')
('cora_full', 'GATConv', 'SYM', 'sbm')
('cora_full', 'GCNConv', 'SYM', 'conf')
('cora_full', 'GCNConv', 'SYM', 'original')
('cora_full', 'GCNConv', 'SYM', 'random')
('cora_full', 'GCNConv', 'SYM', 'sbm')
('cora_full', 'SAGEConv', 'SYM', 'conf')
('cora_full', 'SAGEConv', 'SYM', 'original')
('cora_full', 'SAGEConv', 'SYM', 'random')
('cora_full', 'SAGEConv', 'SYM', 'sbm')


In [88]:
gnns_df.groupby(['dataset', 'conv', 'flow', 'Network']).ngroups

92

In [90]:
arr1 = gnns_df[(gnns_df.dataset=='cora')&(gnns_df.conv=='GATConv')&(gnns_df.flow=='SYM')&(gnns_df.Network=='original')].test_acc.values

In [91]:
arr2 = baseline_df[baseline_df.dataset=='cora'].test_acc.values

In [92]:
_, p_value = stats.ttest_ind(arr1, arr2, equal_var=False)

In [93]:
p_value

4.521912231115878e-107

In [94]:
baseline_df.head()

Unnamed: 0,dataset,method,test_acc
0,cora,baseline,0.596253
1,cora,baseline,0.589696
2,cora,baseline,0.608431
3,cora,baseline,0.574707
4,cora,baseline,0.591101


In [102]:
statistical_significances = []
for entry in gnns_df.groupby(['dataset', 'conv', 'flow', 'Network']).groups:
    baseline_values = baseline_df[baseline_df.dataset==entry[0]].test_acc.values
    approach_values = gnns_df[(gnns_df.dataset==entry[0])&(gnns_df.conv==entry[1])&(gnns_df.flow==entry[2])&(gnns_df.Network==entry[3])].test_acc.values
    _, p_value = stats.ttest_ind(baseline_values, approach_values, equal_var=False)
    baseline_mean = np.mean(baseline_values)
    approach_mean = np.mean(approach_values)
    num_significance_tests = len(gnns_df[gnns_df.dataset==entry[0]].groupby(['dataset', 'conv', 'flow', 'Network']))
    alpha = 0.001
    _, p_value_normal_baseline = stats.normaltest(baseline_values)
    _, p_value_normal_approach = stats.normaltest(approach_values)
    statistical_significances.append({
        'dataset':entry[0],
        'conv':entry[1],
        'flow':entry[2],
        'Network':entry[3],
        'Baseline normal':p_value_normal_baseline>alpha,
        'Approach normal:':p_value_normal_approach>alpha,
        'p-value':p_value,
        'p-value<0.05':p_value<0.05,
        'p-value<0.05 (Bonferroni)':p_value<(0.05/num_significance_tests),
        'p-value<0.01':p_value<0.01,
        'p-value<0.01 (Bonferroni)':p_value<(0.01/num_significance_tests),
        'Baseline mean':baseline_mean,
        'Approach mean':approach_mean,
        'Beats baseline':approach_mean>baseline_mean
    })

In [104]:
statistical_significances_df = pd.DataFrame(statistical_significances)

In [105]:
statistical_significances_df[(statistical_significances_df.Network=='original')]

Unnamed: 0,dataset,conv,flow,Network,Baseline normal,Approach normal:,p-value,p-value<0.05,p-value<0.05 (Bonferroni),p-value<0.01,p-value<0.01 (Bonferroni),Baseline mean,Approach mean,Beats baseline
1,citeseer,GATConv,SYM,original,True,False,1.728788e-81,True,True,True,True,0.620215,0.733422,True
5,citeseer,GCNConv,SYM,original,True,False,1.703898e-76,True,True,True,True,0.620215,0.72131,True
9,citeseer,RGCNConv,SYM,original,True,False,1.113763e-30,True,True,True,True,0.620215,0.652624,True
13,citeseer,SAGEConv,SYM,original,True,False,1.7290179999999998e-77,True,True,True,True,0.620215,0.722808,True
17,cora,GATConv,SYM,original,True,False,4.5219119999999997e-107,True,True,True,True,0.579129,0.803943,True
21,cora,GCNConv,SYM,original,True,False,8.898874e-107,True,True,True,True,0.579129,0.812407,True
25,cora,RGCNConv,SYM,original,True,False,1.9290109999999997e-100,True,True,True,True,0.579129,0.774159,True
29,cora,SAGEConv,SYM,original,True,False,4.651399e-107,True,True,True,True,0.579129,0.811377,True
33,cora_full,GATConv,SYM,original,True,False,1.3515819999999998e-187,True,True,True,True,0.408393,0.549928,True
37,cora_full,GCNConv,SYM,original,True,False,4.710571e-151,True,True,True,True,0.408393,0.5478,True


In [106]:
statistical_significances_df[(statistical_significances_df.Network=='conf')]

Unnamed: 0,dataset,conv,flow,Network,Baseline normal,Approach normal:,p-value,p-value<0.05,p-value<0.05 (Bonferroni),p-value<0.01,p-value<0.01 (Bonferroni),Baseline mean,Approach mean,Beats baseline
0,citeseer,GATConv,SYM,conf,True,False,5.184513e-132,True,True,True,True,0.620215,0.305712,False
4,citeseer,GCNConv,SYM,conf,True,False,2.029385e-121,True,True,True,True,0.620215,0.336563,False
8,citeseer,RGCNConv,SYM,conf,True,False,6.684304e-96,True,True,True,True,0.620215,0.478615,False
12,citeseer,SAGEConv,SYM,conf,True,False,1.119228e-126,True,True,True,True,0.620215,0.306396,False
16,cora,GATConv,SYM,conf,True,False,1.02247e-111,True,True,True,True,0.579129,0.298237,False
20,cora,GCNConv,SYM,conf,True,False,6.30857e-106,True,True,True,True,0.579129,0.342614,False
24,cora,RGCNConv,SYM,conf,True,False,4.4899679999999996e-86,True,True,True,True,0.579129,0.428291,False
28,cora,SAGEConv,SYM,conf,True,False,1.743274e-110,True,True,True,True,0.579129,0.317016,False
32,cora_full,GATConv,SYM,conf,True,False,3.665675e-187,True,True,True,True,0.408393,0.048338,False
36,cora_full,GCNConv,SYM,conf,True,False,1.3764620000000001e-191,True,True,True,True,0.408393,0.039449,False


In [107]:
statistical_significances_df[(statistical_significances_df.Network=='sbm')]

Unnamed: 0,dataset,conv,flow,Network,Baseline normal,Approach normal:,p-value,p-value<0.05,p-value<0.05 (Bonferroni),p-value<0.01,p-value<0.01 (Bonferroni),Baseline mean,Approach mean,Beats baseline
3,citeseer,GATConv,SYM,sbm,True,False,0.01030776,True,False,False,False,0.620215,0.625449,True
7,citeseer,GCNConv,SYM,sbm,True,False,4.129082e-11,True,True,True,True,0.620215,0.605416,False
11,citeseer,RGCNConv,SYM,sbm,True,False,0.00127849,True,True,True,False,0.620215,0.61343,False
15,citeseer,SAGEConv,SYM,sbm,True,False,0.5108553,False,False,False,False,0.620215,0.618895,False
19,cora,GATConv,SYM,sbm,True,False,0.065961,False,False,False,False,0.579129,0.574802,False
23,cora,GCNConv,SYM,sbm,True,False,3.795864e-37,True,True,True,True,0.579129,0.62435,True
27,cora,RGCNConv,SYM,sbm,True,False,0.3439309,False,False,False,False,0.579129,0.58139,True
31,cora,SAGEConv,SYM,sbm,True,False,3.70765e-41,True,True,True,True,0.579129,0.629311,True
35,cora_full,GATConv,SYM,sbm,True,False,2.98627e-150,True,True,True,True,0.408393,0.274743,False
39,cora_full,GCNConv,SYM,sbm,True,False,8.485715e-131,True,True,True,True,0.408393,0.302353,False


In [108]:
statistical_significances_df[(statistical_significances_df.Network=='random')]

Unnamed: 0,dataset,conv,flow,Network,Baseline normal,Approach normal:,p-value,p-value<0.05,p-value<0.05 (Bonferroni),p-value<0.01,p-value<0.01 (Bonferroni),Baseline mean,Approach mean,Beats baseline
2,citeseer,GATConv,SYM,random,True,False,2.790824e-118,True,True,True,True,0.620215,0.361907,False
6,citeseer,GCNConv,SYM,random,True,False,6.860069e-117,True,True,True,True,0.620215,0.360052,False
10,citeseer,RGCNConv,SYM,random,True,False,3.883085e-51,True,True,True,True,0.620215,0.566544,False
14,citeseer,SAGEConv,SYM,random,True,False,3.813425e-117,True,True,True,True,0.620215,0.361956,False
18,cora,GATConv,SYM,random,True,False,3.054157e-109,True,True,True,True,0.579129,0.324144,False
22,cora,GCNConv,SYM,random,True,False,2.88517e-105,True,True,True,True,0.579129,0.340811,False
26,cora,RGCNConv,SYM,random,True,False,5.297429e-111,True,True,True,True,0.579129,0.405868,False
30,cora,SAGEConv,SYM,random,True,False,2.3734860000000003e-105,True,True,True,True,0.579129,0.341885,False
34,cora_full,GATConv,SYM,random,True,False,4.4806390000000006e-175,True,True,True,True,0.408393,0.018075,False
38,cora_full,GCNConv,SYM,random,True,False,1.30383e-186,True,True,True,True,0.408393,0.056094,False
