In [38]:
import numpy as np
import pandas as pd
from scipy.stats import normaltest, mannwhitneyu, ttest_ind
import plotly.graph_objects as go
import plotly.express as px

In [39]:
df = pd.read_csv('data_for_evaluation/feeback_castle4__04Mar.csv',  sep=';')
df.head()

Unnamed: 0,dataset_name,varsortability,N_variables,N_obs,algo_name,algo_param,library_name,Error,fdr,tpr,fpr,shd,nnz,precision,recall,F1,gscore,runtime_second,experiment_time
0,sim-13.Network1_amp.continuous,0.666667,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.4,0.8333,0.5,3.0,5.0,0.6667,0.6667,0.6667,0.3333,0.14,Sat Mar 4 11:48:53 2023
1,sim-41.Network1_amp.continuous,0.208333,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.1667,1.1667,0.25,1.0,6.0,0.6667,1.0,0.8,0.5,0.17,Sat Mar 4 11:48:53 2023
2,sim-59.Network1_amp.continuous,0.291667,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.2,1.0,0.25,1.0,5.0,0.8333,0.8333,0.8333,0.6667,0.12,Sat Mar 4 11:48:53 2023
3,sim-10.Network1_amp.continuous,0.166667,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.5,0.8333,0.75,3.0,6.0,0.5,0.5,0.5,0.0,0.17,Sat Mar 4 11:48:53 2023
4,sim-04.Network1_amp.continuous,0.25,5,500,pc,"{'variant': 'original', 'alpha': 0.05, 'ci_tes...",gCastle,No Error,0.2857,1.1667,0.5,2.0,7.0,0.6,1.0,0.75,0.3333,0.16,Sat Mar 4 11:48:53 2023


In [40]:
algos = ['pc', 'ges', 'icalingam', 'directlingam']
fig = go.Figure()
for algo in algos:
        fig.add_trace(go.Violin(x=df['algo_name'][df['algo_name'] == algo],
                            y=df['F1'][df['algo_name'] == algo],
                            name=algo,
                            box_visible=True,
                            meanline_visible=True))
fig.show()

In [41]:
summary = pd.DataFrame()
for algo in algos:
    summary[algo] = df['F1'][df['algo_name'] == algo].describe()
print(summary)   

                pc          ges    icalingam  directlingam
count  1071.000000  1071.000000  1052.000000   1070.000000
mean      0.543327     0.535690     0.508560      0.609195
std       0.159453     0.144165     0.165583      0.166673
min       0.100000     0.125000     0.125000      0.133300
25%       0.430500     0.431700     0.375000      0.500000
50%       0.533300     0.533300     0.500000      0.615400
75%       0.636400     0.625000     0.625000      0.750000
max       1.000000     0.933300     0.933300      0.933300


In [42]:
algo_df = df.groupby('algo_name')['F1'].apply(list)
print(algo_df.keys())
for key in algo_df.keys():
    print(f"{key} has {len(algo_df[key])} entries")

Index(['directlingam', 'ges', 'icalingam', 'pc'], dtype='object', name='algo_name')
directlingam has 1079 entries
ges has 1079 entries
icalingam has 1079 entries
pc has 1079 entries


In [43]:
def normality_test(data_dict, data_key):
    data = data_dict[data_key]
    data = data.dropna()
    print(len(data.to_numpy()))
    value, p = normaltest(data.to_numpy())
    print(f"Normality tested on {data_key}. value: {value.round(2)}, p:{p.round(2)}")
    if p >= 0.05:
        print(f'It is likely that the distribution of {data_key} datasets is normal.\n')
    else:
         print(f'It is unlikely that the distribution of {data_key} datasets is normal.\n') 

In [44]:
def gen_dict(algos:list, value_to_analyse:str):
    result = {}
    for algo in algos:
        result[algo] = df[value_to_analyse][df['algo_name'] == algo]
    return result 


In [45]:
algos_dict = gen_dict(algos, 'F1')

for algo in algos_dict.keys():
    normality_test(algos_dict, algo)

1071
Normality tested on pc. value: 10.44, p:0.01
It is unlikely that the distribution of pc datasets is normal.

1071
Normality tested on ges. value: 4.51, p:0.11
It is likely that the distribution of ges datasets is normal.

1052
Normality tested on icalingam. value: 59.88, p:0.0
It is unlikely that the distribution of icalingam datasets is normal.

1070
Normality tested on directlingam. value: 42.64, p:0.0
It is unlikely that the distribution of directlingam datasets is normal.



In [46]:
def mannwhitneyu_test(data_dict, data_key_1, data_key_2):
    data_1 = data_dict[data_key_1]
    data_1 = data_1.dropna()
    data_2 = data_dict[data_key_2]
    data_2 = data_2.dropna()
    
    value, p = mannwhitneyu(data_1.to_numpy(), data_2.to_numpy())
    print(f"Mannwhitneyu tested on {data_key_1} and {data_key_2}. value: {value.round(2)}, p:{p.round(2)}")
    if p > 0.05:
        print('Samples are likely drawn from the same distributions (fail to reject H0)\n')
    else:
         print('Samples are likely drawn from different distributions (reject H0).\n') 

In [47]:
def gen_pair(elements:list):
    pairs = []
    for i in range(0, len(elements) - 1):
        fst = elements[i]
        for j in range(i+1, len(elements)):
            snd = elements[j]
            pairs.append((fst, snd))
    return pairs        
print(gen_pair(algos))            
            

[('pc', 'ges'), ('pc', 'icalingam'), ('pc', 'directlingam'), ('ges', 'icalingam'), ('ges', 'directlingam'), ('icalingam', 'directlingam')]


In [48]:
to_compare = gen_pair(algos)
for pair in to_compare:
    mannwhitneyu_test(algos_dict, pair[0], pair[1])

Mannwhitneyu tested on pc and ges. value: 582876.5, p:0.51
Samples are likely drawn from the same distributions (fail to reject H0)

Mannwhitneyu tested on pc and icalingam. value: 623809.5, p:0.0
Samples are likely drawn from different distributions (reject H0).

Mannwhitneyu tested on pc and directlingam. value: 429130.5, p:0.0
Samples are likely drawn from different distributions (reject H0).

Mannwhitneyu tested on ges and icalingam. value: 615959.5, p:0.0
Samples are likely drawn from different distributions (reject H0).

Mannwhitneyu tested on ges and directlingam. value: 411810.0, p:0.0
Samples are likely drawn from different distributions (reject H0).

Mannwhitneyu tested on icalingam and directlingam. value: 372393.5, p:0.0
Samples are likely drawn from different distributions (reject H0).



In [49]:
df_pcstable=pd.read_csv('data_for_evaluation/pcstable_inria_gids_19Mar.csv', sep=';')
df_sensitivity = pd.read_csv('data_for_evaluation/inria_gids_castle4_11Mar.csv', sep=';')
df_sensitivity = df_sensitivity[df_sensitivity.algo_name != 'pc']
df_sensitivity = pd.concat([df_sensitivity, df_pcstable], ignore_index=True)
df_sensitivity

Unnamed: 0,dataset_name,varsortability,N_variables,N_obs,algo_name,algo_param,library_name,Error,fdr,tpr,fpr,shd,nnz,precision,recall,F1,gscore,runtime_second,experiment_time
0,G3_v2_confounders_numdata.csv,0.500000,18,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.5357,0.6842,0.1119,19,28,0.4643,0.6842,0.5532,0.0,1.32,Sat Mar 11 15:57:19 2023
1,G2_v4_confounders_numdata.csv,0.828571,18,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.5600,0.6875,0.1022,17,25,0.4400,0.6875,0.5366,0.0,1.86,Sat Mar 11 15:57:19 2023
2,G2_v3_confounders_numdata.csv,0.710526,19,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.7188,0.5625,0.1484,26,32,0.2812,0.5625,0.3750,0.0,2.23,Sat Mar 11 15:57:19 2023
3,G2_v2_confounders_numdata.csv,0.793103,21,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.5957,0.7600,0.1514,32,47,0.4043,0.7600,0.5278,0.0,2.25,Sat Mar 11 15:57:19 2023
4,G2_v1_confounders_numdata.csv,0.794872,19,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.6190,0.7619,0.1733,30,42,0.3810,0.7619,0.5079,0.0,4.24,Sat Mar 11 15:57:21 2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,G5_v4_numdata.csv,0.732456,21,500,pc,"{'variant': 'stable', 'alpha': 0.05, 'ci_test'...",gCastle,No Error,0.5366,0.4222,0.1333,41,41,0.4634,0.4222,0.4419,0.0,2.39,Sun Mar 19 20:00:17 2023
156,G2_v1_numdata.csv,0.761538,22,500,pc,"{'variant': 'stable', 'alpha': 0.05, 'ci_test'...",gCastle,No Error,0.4848,0.5667,0.0796,25,33,0.4595,0.5667,0.5075,0.0,3.05,Sun Mar 19 20:00:17 2023
157,G3_v5_numdata.csv,0.808333,20,500,pc,"{'variant': 'stable', 'alpha': 0.05, 'ci_test'...",gCastle,No Error,0.6562,0.3438,0.1329,38,32,0.3235,0.3438,0.3333,0.0,2.37,Sun Mar 19 20:00:17 2023
158,G2_v2_numdata.csv,0.758242,24,500,pc,"{'variant': 'stable', 'alpha': 0.05, 'ci_test'...",gCastle,No Error,0.6061,0.3714,0.0830,39,33,0.3824,0.3714,0.3768,0.0,2.87,Sun Mar 19 20:00:18 2023


In [50]:
dataset_types = ['with_hidden_variable', 'with_out_hidden_variable']
def extract_dataset_type(dataset_name:str):
    if 'confounders' in dataset_name:
        return 'with_hidden_variable'
    else:
        return 'with_out_hidden_variable'
df_sensitivity['dataset_type'] = df_sensitivity['dataset_name'].apply(extract_dataset_type)
df_sensitivity.head()

Unnamed: 0,dataset_name,varsortability,N_variables,N_obs,algo_name,algo_param,library_name,Error,fdr,tpr,fpr,shd,nnz,precision,recall,F1,gscore,runtime_second,experiment_time,dataset_type
0,G3_v2_confounders_numdata.csv,0.5,18,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.5357,0.6842,0.1119,19,28,0.4643,0.6842,0.5532,0.0,1.32,Sat Mar 11 15:57:19 2023,with_hidden_variable
1,G2_v4_confounders_numdata.csv,0.828571,18,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.56,0.6875,0.1022,17,25,0.44,0.6875,0.5366,0.0,1.86,Sat Mar 11 15:57:19 2023,with_hidden_variable
2,G2_v3_confounders_numdata.csv,0.710526,19,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.7188,0.5625,0.1484,26,32,0.2812,0.5625,0.375,0.0,2.23,Sat Mar 11 15:57:19 2023,with_hidden_variable
3,G2_v2_confounders_numdata.csv,0.793103,21,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.5957,0.76,0.1514,32,47,0.4043,0.76,0.5278,0.0,2.25,Sat Mar 11 15:57:19 2023,with_hidden_variable
4,G2_v1_confounders_numdata.csv,0.794872,19,500,icalingam,"{'random_state': None, 'max_iter': 1000, 'thre...",gCastle,No Error,0.619,0.7619,0.1733,30,42,0.381,0.7619,0.5079,0.0,4.24,Sat Mar 11 15:57:21 2023,with_hidden_variable


In [51]:

for algo in algos:
    fig = go.Figure()
    df_to_plot = df_sensitivity.loc[df_sensitivity['algo_name'] == algo]
    for typ in dataset_types:
            fig.add_trace(go.Violin(x=df_to_plot['dataset_type'][df_to_plot['dataset_type'] == typ],
                                y=df_to_plot['F1'][df_to_plot['dataset_type'] == typ],
                                name=typ,
                                box_visible=True,
                                meanline_visible=True))
    fig.update_layout(font_size=26)
#     fig.update_layout(title_text=algo)
    fig.update_layout(showlegend=False) 
    fig.update_yaxes(title_text="F1-Score")
    fig.show()

In [52]:
dfs_to_describe = []
for algo in algos:
    df_to_plot = df_sensitivity.loc[df_sensitivity['algo_name'] == algo]
    dfs_to_describe.append(df_to_plot)
    summary = pd.DataFrame()
    print('\n')
    print(algo)
    for typ in dataset_types:
        summary[typ] = df_to_plot['F1'][df_to_plot['dataset_type'] == typ].describe()
    print(summary.round(2)) 



pc
       with_hidden_variable  with_out_hidden_variable
count                 20.00                     20.00
mean                   0.40                      0.44
std                    0.10                      0.10
min                    0.18                      0.26
25%                    0.34                      0.37
50%                    0.41                      0.42
75%                    0.46                      0.51
max                    0.58                      0.65


ges
       with_hidden_variable  with_out_hidden_variable
count                 20.00                     20.00
mean                   0.30                      0.36
std                    0.07                      0.07
min                    0.19                      0.25
25%                    0.27                      0.31
50%                    0.31                      0.34
75%                    0.35                      0.40
max                    0.43                      0.47


icalingam
     

In [53]:
for dataframe in dfs_to_describe:
    print(dataframe['algo_name'].unique())
    
    dataset_dict = {'with_hidden_variable':dataframe['F1'][dataframe['dataset_type'] == 'with_hidden_variable'],
                   'with_out_hidden_variable':dataframe['F1'][dataframe['dataset_type'] == 'with_out_hidden_variable'] }

    for typ in dataset_types:
        normality_test(dataset_dict, typ)

['pc']
20
Normality tested on with_hidden_variable. value: 1.36, p:0.51
It is likely that the distribution of with_hidden_variable datasets is normal.

20
Normality tested on with_out_hidden_variable. value: 1.13, p:0.57
It is likely that the distribution of with_out_hidden_variable datasets is normal.

['ges']
20
Normality tested on with_hidden_variable. value: 0.09, p:0.96
It is likely that the distribution of with_hidden_variable datasets is normal.

20
Normality tested on with_out_hidden_variable. value: 1.21, p:0.55
It is likely that the distribution of with_out_hidden_variable datasets is normal.

['icalingam']
20
Normality tested on with_hidden_variable. value: 0.23, p:0.89
It is likely that the distribution of with_hidden_variable datasets is normal.

20
Normality tested on with_out_hidden_variable. value: 0.52, p:0.77
It is likely that the distribution of with_out_hidden_variable datasets is normal.

['directlingam']
20
Normality tested on with_hidden_variable. value: 1.52, p:

In [56]:
def my_ttest(dictionary:dict, key_1:str, key_2:str, equal_std=False):
    value_1 = dictionary[key_1]
    value_1.dropna()
    value_2 = dictionary[key_2]
    value_2.dropna()
    value, p = ttest_ind(value_1.to_numpy(), value_2.to_numpy(), equal_var = equal_std)
    
    if p > 0.05:
        print(f'p-value: {p.round(2)} H0: Samples are likely drawn from the same distributions (APPROVED)\n')
    else:
        print(f'p-value: {p.round(2)} H0: Samples are likely drawn from the same distributions (REJECTED)\n')
        

    

In [57]:

for dataframe in dfs_to_describe:
    name = dataframe['algo_name'].unique()
    print(name[0])
    
    dataset_dict = {'with_hidden_variable':dataframe['F1'][dataframe['dataset_type'] == 'with_hidden_variable'],
                   'with_out_hidden_variable':dataframe['F1'][dataframe['dataset_type'] == 'with_out_hidden_variable'] }
    if name[0] == 'ges':
        my_ttest(dataset_dict, 'with_hidden_variable', 'with_out_hidden_variable', True)
    else:
        my_ttest(dataset_dict, 'with_hidden_variable', 'with_out_hidden_variable')        


pc
p-value: 0.24 H0: Samples are likely drawn from the same distributions (APPROVED)

ges
p-value: 0.01 H0: Samples are likely drawn from the same distributions (REJECTED)

icalingam
p-value: 0.71 H0: Samples are likely drawn from the same distributions (APPROVED)

directlingam
p-value: 0.12 H0: Samples are likely drawn from the same distributions (APPROVED)

