In [1]:
import numpy as np
import pandas as pd
from scipy.stats import normaltest, mannwhitneyu
import plotly.graph_objects as go
import plotly.express as px

In [19]:
df = pd.read_csv('data_for_evaluation/feeback_castle4__04Mar.csv',  sep=';')
df.head()

Unnamed: 0,dataset_name,varsortability,N_variables,N_obs,algo_name,algo_param,library_name,Error,fdr,tpr,fpr,shd,nnz,precision,recall,F1,gscore,runtime_second,experiment_time
0,sim-13.Network1_amp.continuous,0.666667,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.4,0.8333,0.5,3.0,5.0,0.6667,0.6667,0.6667,0.3333,0.14,Sat Mar 4 11:48:53 2023
1,sim-41.Network1_amp.continuous,0.208333,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.1667,1.1667,0.25,1.0,6.0,0.6667,1.0,0.8,0.5,0.17,Sat Mar 4 11:48:53 2023
2,sim-59.Network1_amp.continuous,0.291667,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.2,1.0,0.25,1.0,5.0,0.8333,0.8333,0.8333,0.6667,0.12,Sat Mar 4 11:48:53 2023
3,sim-10.Network1_amp.continuous,0.166667,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.5,0.8333,0.75,3.0,6.0,0.5,0.5,0.5,0.0,0.17,Sat Mar 4 11:48:53 2023
4,sim-04.Network1_amp.continuous,0.25,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.2857,1.1667,0.5,2.0,7.0,0.6,1.0,0.75,0.3333,0.16,Sat Mar 4 11:48:53 2023


In [20]:
algos = ['pc', 'ges', 'icalingam', 'directlingam']
fig = go.Figure()
for algo in algos:
        fig.add_trace(go.Violin(x=df['algo_name'][df['algo_name'] == algo],
                            y=df['F1'][df['algo_name'] == algo],
                            name=algo,
                            box_visible=True,
                            meanline_visible=True))
fig.show()

In [21]:
summary = pd.DataFrame()
for algo in algos:
    summary[algo] = df['F1'][df['algo_name'] == algo].describe()
print(summary)   

                pc          ges    icalingam  directlingam
count  1071.000000  1071.000000  1052.000000   1070.000000
mean      0.543327     0.535690     0.508560      0.609195
std       0.159453     0.144165     0.165583      0.166673
min       0.100000     0.125000     0.125000      0.133300
25%       0.430500     0.431700     0.375000      0.500000
50%       0.533300     0.533300     0.500000      0.615400
75%       0.636400     0.625000     0.625000      0.750000
max       1.000000     0.933300     0.933300      0.933300


In [26]:
algo_df = df.groupby('algo_name')['F1'].apply(list)
print(algo_df.keys())
for key in algo_df.keys():
    print(f"{key} has {len(algo_df[key])} entries")

Index(['directlingam', 'ges', 'icalingam', 'pc'], dtype='object', name='algo_name')
(4316, 19)
directlingam has 1079 entries
ges has 1079 entries
icalingam has 1079 entries
pc has 1079 entries


In [41]:
def normality_test(data_dict, data_key):
    data = data_dict[data_key]
    data = data.dropna()
    print(len(data.to_numpy()))
    value, p = normaltest(data.to_numpy())
    print(f"Normality tested on {data_key}. value: {value}, p:{p}")
    if p >= 0.05:
        print(f'It is likely that the distribution of {data_key} datasets is normal.\n')
    else:
         print(f'It is unlikely that the distribution of {data_key} datasets is normal.\n') 

In [65]:
def gen_dict(algos:list, value_to_analyse:str):
    result = {}
    for algo in algos:
        result[algo] = df[value_to_analyse][df['algo_name'] == algo]
    return result 


In [67]:
algos_dict = gen_dict(algos, 'F1')

for algo in algos_dict.keys():
    normality_test(algos_dict, algo)

1071
Normality tested on pc. value: 10.444745099734758, p:0.005394515175318968
It is unlikely that the distribution of pc datasets is normal.

1071
Normality tested on ges. value: 4.505471415693676, p:0.10511127712579547
It is likely that the distribution of ges datasets is normal.

1052
Normality tested on icalingam. value: 59.879308300192626, p:9.939703108083262e-14
It is unlikely that the distribution of icalingam datasets is normal.

1070
Normality tested on directlingam. value: 42.64222488413989, p:5.499947176230139e-10
It is unlikely that the distribution of directlingam datasets is normal.



In [45]:
def mannwhitneyu_test(data_dict, data_key_1, data_key_2):
    data_1 = data_dict[data_key_1]
    data_1 = data_1.dropna()
    data_2 = data_dict[data_key_2]
    data_2 = data_2.dropna()
    
    value, p = mannwhitneyu(data_1.to_numpy(), data_2.to_numpy())
    print(f"Mannwhitneyu tested on {data_key_1} and {data_key_2}. value: {value}, p:{p}")
    if p > 0.05:
        print('Samples are likely drawn from the same distributions (fail to reject H0)\n')
    else:
         print('Samples are likely drawn from different distributions (reject H0).\n') 

In [59]:
def gen_pair(elements:list):
    pairs = []
    for i in range(0, len(elements) - 1):
        fst = elements[i]
        for j in range(i+1, len(elements)):
            snd = elements[j]
            pairs.append((fst, snd))
    return pairs        
print(gen_pair(algos))            
            

[('pc', 'ges'), ('pc', 'icalingam'), ('pc', 'directlingam'), ('ges', 'icalingam'), ('ges', 'directlingam'), ('icalingam', 'directlingam')]


In [68]:
to_compare = gen_pair(algos)
for pair in to_compare:
    mannwhitneyu_test(algos_dict, pair[0], pair[1])

Mannwhitneyu tested on pc and ges. value: 582876.5, p:0.5131219593428282
Samples are likely drawn from the same distributions (fail to reject H0)

Mannwhitneyu tested on pc and icalingam. value: 623809.5, p:1.828696659762571e-05
Samples are likely drawn from different distributions (reject H0).

Mannwhitneyu tested on pc and directlingam. value: 429130.5, p:7.780749123508183e-24
Samples are likely drawn from different distributions (reject H0).

Mannwhitneyu tested on ges and icalingam. value: 615959.5, p:0.00019285846738799177
Samples are likely drawn from different distributions (reject H0).

Mannwhitneyu tested on ges and directlingam. value: 411810.0, p:1.684816625872063e-29
Samples are likely drawn from different distributions (reject H0).

Mannwhitneyu tested on icalingam and directlingam. value: 372393.5, p:1.3621880577879656e-41
Samples are likely drawn from different distributions (reject H0).

