# Statistical testing of results
This notebook provides the code for statistical testing of the results from the thesis "A holistic analysis of bias assessment within federated learning" by Jelke Matthijsse. This notebook reads the results from the result-files as provided by the experimental setup in [LINK GITHUB] and provides the statistical testing to verify these results.

In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import re
from statistics import stdev
import pandas as pd
import scipy.stats as stats
from scipy.spatial.distance import euclidean
import math

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


The following function helps with extracting data from the results-files and calculating the mean and standard deviation given the results from one experimental partition.

In [2]:
def extract_values(results_text):
    """
    Extract correct values from results-files
    Input: results-file of specific experiment
    Output: accuracies, equalized odds values, demographic parity values for local, global and central pipeline
    """  

    # Regular expression pattern to extract the sizes
    size_pattern = re.compile(r'#Train samples: (\d+), #test samples (\d+)')

    # Find all matches in the text
    matches = size_pattern.findall(results_text)

    # If matches are found, extract the sizes
    if matches:
        train_samples, test_samples = matches[0]
        train_samples = int(train_samples)
        test_samples = int(test_samples)
    else:
        train_samples, test_samples = 0, 0

    ###########################################
    # Extract LOCAL metrics from results
    ###########################################

    # Regular expressions to match the required values
    local_metrics_pattern = re.compile(r'History \(metrics, distributed, evaluate\):\s*\{([^}]+)\}')

    # Parsing federated metrics
    local_metrics_raw = local_metrics_pattern.findall(results_text)
    local_accuracies = []
    local_dp_sex = []
    local_dp_race = []
    local_dp_white = []
    local_dp_black = []
    local_eo_sex = []
    local_eo_race = []
    local_eo_white = []
    local_eo_black = []
    
    # Extracting values using regular expressions
    for round in local_metrics_raw:
        for metric in round.split('],\n'):
            key, values = metric.split(':', 1)
            key = key.strip().strip("'")
            # values = re.findall(r'[\d.]+', values)
            values = re.findall(r'[\d.]+|nan', values)
            # Convert the last value to float or NaN
            if values:
                last_value = values[-1]
                if last_value == 'nan':
                    last_value = np.nan
                else:
                    last_value = float(last_value)
            
            if key == 'accuracy':
                local_accuracies.append(last_value)
            elif key == 'dp sex':
                local_dp_sex.append(last_value)
            elif key == 'dp race':
                local_dp_race.append(last_value)
            elif key == 'dp white':
                local_dp_white.append(last_value)
            elif key == 'dp black':
                local_dp_black.append(last_value)
            elif key == 'eo sex':
                local_eo_sex.append(last_value)
            elif key == 'eo race':
                local_eo_race.append(last_value)
            elif key == 'eo white':
                local_eo_white.append(last_value)
            elif key == 'eo black':
                local_eo_black.append(last_value)
   
    local_demographic_parities = [local_dp_sex, local_dp_race, local_dp_white, local_dp_black]
    local_equalized_odds = [local_eo_sex, local_eo_race, local_eo_white, local_eo_black]

    ###########################################
    # Extract GLOBAL metrics from results
    ###########################################

    # Regular expressions to match the required values
    global_metrics_pattern = re.compile(r'History \(metrics, centralized\):\s*\{([^}]+)\}')
    
    # Parsing centralized metrics
    global_metrics_raw = global_metrics_pattern.findall(results_text)
    global_accuracies = []
    global_dp_sex = []
    global_dp_race = []
    global_dp_white = []
    global_dp_black = []
    global_eo_sex = []
    global_eo_race = []
    global_eo_white = []
    global_eo_black = []  

    # Extracting values using regular expressions  
    for round in global_metrics_raw:
        for metric in round.split('],\n'):
            key, values = metric.split(':', 1)
            key = key.strip().strip("'")
            values = re.findall(r'[\d.]+|nan', values)
            
            def convert_to_float_or_nan(value):
                return np.nan if value == 'nan' else float(value)
            
            if key == 'accuracy':
                global_accuracies.append(convert_to_float_or_nan(values[-1]))
            elif key == 'demographic_parity':
                all_global_dp = values[-4:]
                global_dp_sex.append(convert_to_float_or_nan(all_global_dp[0]))
                global_dp_race.append(convert_to_float_or_nan(all_global_dp[1]))
                global_dp_white.append(convert_to_float_or_nan(all_global_dp[2]))
                global_dp_black.append(convert_to_float_or_nan(all_global_dp[3]))
            elif key in ['equalized_odss', 'equalized_odds']:
                all_global_eo = values[-4:]
                global_eo_sex.append(convert_to_float_or_nan(all_global_eo[0]))
                global_eo_race.append(convert_to_float_or_nan(all_global_eo[1]))
                global_eo_white.append(convert_to_float_or_nan(all_global_eo[2]))
                global_eo_black.append(convert_to_float_or_nan(all_global_eo[3]))

    global_equalized_odds = [global_eo_sex, global_eo_race, global_eo_white, global_eo_black]
    global_demographic_parities = [global_dp_sex, global_dp_race, global_dp_white, global_dp_black]

    ###########################################
    # Extract CENTRAL metrics from results
    ###########################################

    # Regular expressions to match the required values
    central_accuracy_pattern = re.compile(r'Results Central\nAccuracy: ([\d.]+|NaN)')
    central_equalized_odds_pattern = re.compile(r'Equalized Odds \[([^\]]+)\]')
    central_demographic_parity_pattern = re.compile(r'Demographic Parity \[([^\]]+)\]')

    # Function to convert values to float or NaN
    def convert_to_float_or_nan(value):
        return np.nan if value == 'nan' else float(value)

    # Extracting values using regular expressions
    central_accuracies = central_accuracy_pattern.findall(results_text)
    central_accuracies = [convert_to_float_or_nan(value) for value in central_accuracies]

    central_dp_sex = []
    central_dp_race = []
    central_dp_white = []
    central_dp_black = []
    central_eo_sex = []
    central_eo_race = []
    central_eo_white = []
    central_eo_black = []

    for round in central_demographic_parity_pattern.findall(results_text):
        values = round.split(', ')
        central_dp_sex.append(convert_to_float_or_nan(values[0]))
        central_dp_race.append(convert_to_float_or_nan(values[1]))
        central_dp_white.append(convert_to_float_or_nan(values[2]))
        central_dp_black.append(convert_to_float_or_nan(values[3]))

    for round in central_equalized_odds_pattern.findall(results_text):
        values = round.split(', ')
        central_eo_sex.append(convert_to_float_or_nan(values[0]))
        central_eo_race.append(convert_to_float_or_nan(values[1]))
        central_eo_white.append(convert_to_float_or_nan(values[2]))
        central_eo_black.append(convert_to_float_or_nan(values[3]))
        
    central_demographic_parities = [central_dp_sex, central_dp_race, central_dp_white, central_dp_black]
    central_equalized_odds = [central_eo_sex, central_eo_race, central_eo_white, central_eo_black]
    
    return local_accuracies, local_equalized_odds, local_demographic_parities, global_accuracies, global_equalized_odds, global_demographic_parities,central_accuracies, central_equalized_odds, central_demographic_parities, train_samples, test_samples



In [3]:
def extract_values_1(results_text):
    """
    Extract correct values from results-files
    Input: results-file of specific experiment
    Output: accuracies, equalized odds values, demographic parity values for local, global and central pipeline
    """  

    # Regular expression pattern to extract the sizes
    size_pattern = re.compile(r'#Train samples: (\d+), #test samples (\d+)')

    # Find all matches in the text
    matches = size_pattern.findall(results_text)

    # If matches are found, extract the sizes
    if matches:
        train_samples, test_samples = matches[0]
        train_samples = int(train_samples)
        test_samples = int(test_samples)
    else:
        train_samples, test_samples = 0, 0

    ###########################################
    # Extract LOCAL metrics from results
    ###########################################

    # Regular expressions to match the required values
    local_metrics_pattern = re.compile(r'History \(metrics, distributed, evaluate\):\s*\{([^}]+)\}')

    # Parsing federated metrics
    local_metrics_raw = local_metrics_pattern.findall(results_text)
    local_accuracies = []
    local_dp_sex = []
    local_dp_race = []
    local_dp_white = []
    local_dp_black = []
    local_eo_sex = []
    local_eo_race = []
    local_eo_white = []
    local_eo_black = []
    
    # Extracting values using regular expressions
    for round in local_metrics_raw:
        for metric in round.split('],\n'):
            key, values = metric.split(':', 1)
            key = key.strip().strip("'")
            # values = re.findall(r'[\d.]+', values)
            values = re.findall(r'[\d.]+|nan', values)
            # Convert the last value to float or NaN
            if values:
                last_value = values[-1]
                if last_value == 'nan':
                    last_value = np.nan
                else:
                    last_value = float(last_value)
            
            if key == 'accuracy':
                local_accuracies.append(last_value)
            elif key == 'dp sex':
                local_dp_sex.append(last_value)
            elif key == 'dp race':
                local_dp_race.append(last_value)
            elif key == 'dp white':
                local_dp_white.append(last_value)
            elif key == 'dp black':
                local_dp_black.append(last_value)
            elif key == 'eo sex':
                local_eo_sex.append(last_value)
            elif key == 'eo race':
                local_eo_race.append(last_value)
            elif key == 'eo white':
                local_eo_white.append(last_value)
            elif key == 'eo black':
                local_eo_black.append(last_value)
   
    local_demographic_parities = [local_dp_sex, local_dp_race, local_dp_white, local_dp_black]
    local_equalized_odds = [local_eo_sex, local_eo_race, local_eo_white, local_eo_black]

    ###########################################
    # Extract GLOBAL metrics from results
    ###########################################

    # Regular expressions to match the required values
    global_metrics_pattern = re.compile(r'History \(metrics, centralized\):\s*\{([^}]+)\}')
    
    # Parsing centralized metrics
    global_metrics_raw = global_metrics_pattern.findall(results_text)
    global_accuracies = []
    global_dp_sex = []
    global_dp_race = []
    global_dp_white = []
    global_dp_black = []
    global_eo_sex = []
    global_eo_race = []
    global_eo_white = []
    global_eo_black = []  

    # Extracting values using regular expressions  
    for round in global_metrics_raw:
        for metric in round.split('],\n'):
            key, values = metric.split(':', 1)
            key = key.strip().strip("'")
            values = re.findall(r'[\d.]+|nan', values)
            
            def convert_to_float_or_nan(value):
                return np.nan if value == 'nan' else float(value)
            
            if key == 'accuracy':
                global_accuracies.append(convert_to_float_or_nan(values[-1]))
            elif key == 'demographic_parity':
                all_global_dp = values[-4:]
                global_dp_sex.append(convert_to_float_or_nan(all_global_dp[0]))
                global_dp_race.append(convert_to_float_or_nan(all_global_dp[1]))
                global_dp_white.append(convert_to_float_or_nan(all_global_dp[2]))
                global_dp_black.append(convert_to_float_or_nan(all_global_dp[3]))
            elif key in ['equalized_odss', 'equalized_odds']:
                all_global_eo = values[-4:]
                global_eo_sex.append(convert_to_float_or_nan(all_global_eo[0]))
                global_eo_race.append(convert_to_float_or_nan(all_global_eo[1]))
                global_eo_white.append(convert_to_float_or_nan(all_global_eo[2]))
                global_eo_black.append(convert_to_float_or_nan(all_global_eo[3]))

    global_equalized_odds = [global_eo_sex, global_eo_race, global_eo_white, global_eo_black]
    global_demographic_parities = [global_dp_sex, global_dp_race, global_dp_white, global_dp_black]

    ###########################################
    # Extract CENTRAL metrics from results
    ###########################################

    # Regular expressions to match the required values
    central_accuracy_pattern = re.compile(r'Results Central\nAccuracy: ([\d.]+|NaN)')
    central_equalized_odds_pattern = re.compile(r'Equalized Odds \[([^\]]+)\]')
    central_demographic_parity_pattern = re.compile(r'Demographic Parity \[([^\]]+)\]')

    # Function to convert values to float or NaN
    def convert_to_float_or_nan(value):
        return np.nan if value == 'nan' else float(value)

    # Extracting values using regular expressions
    central_accuracies = central_accuracy_pattern.findall(results_text)
    central_accuracies = [convert_to_float_or_nan(value) for value in central_accuracies]

    central_dp_sex = []
    central_dp_race = []
    central_dp_white = []
    central_dp_black = []
    central_eo_sex = []
    central_eo_race = []
    central_eo_white = []
    central_eo_black = []

    for round in central_demographic_parity_pattern.findall(results_text):
        values = round.split(', ')
        central_dp_sex.append(convert_to_float_or_nan(values[0]))
        central_dp_race.append(convert_to_float_or_nan(values[1]))
        central_dp_white.append(convert_to_float_or_nan(values[2]))
        central_dp_black.append(convert_to_float_or_nan(values[3]))

    for round in central_equalized_odds_pattern.findall(results_text):
        values = round.split(', ')
        central_eo_sex.append(convert_to_float_or_nan(values[0]))
        central_eo_race.append(convert_to_float_or_nan(values[1]))
        central_eo_white.append(convert_to_float_or_nan(values[2]))
        central_eo_black.append(convert_to_float_or_nan(values[3]))
        
    central_demographic_parities = [central_dp_sex, central_dp_race, central_dp_white, central_dp_black]
    central_equalized_odds = [central_eo_sex, central_eo_race, central_eo_white, central_eo_black]
    
    return local_equalized_odds, local_demographic_parities, global_equalized_odds, global_demographic_parities, central_equalized_odds, central_demographic_parities



In [4]:
def stdev_of_difference(stdev1, stdev2):
    # calculate the stdev for the difference between two values, given the stdev of these values
    return math.sqrt(stdev1**2 + stdev2**2)

def calculate_stdev(local_accuracies, global_accuracies, central_accuracies, local_demographic_parities, global_demographic_parities, central_demographic_parities, local_equalized_odds, global_equalized_odds, central_equalized_odds):
    """
    Calculates stdev over different seeds
    Input: lists of metric scores over all seeds, for all three pipelines and experiments
    Output: stdev for all metrics per pipeline, stdev for the differences between pipelines 
    """  
    
    # calculate stdev for all local metric results
    local_stdev = [round(np.nanstd(metric), 3) for metric in [local_accuracies] + local_demographic_parities + local_equalized_odds]

    # calculate stdev for all global metric results
    global_stdev = [round(np.nanstd(metric), 3) for metric in [global_accuracies] + global_demographic_parities + global_equalized_odds]

    # calculate stdev for all central metric results
    central_stdev = [round(np.nanstd(metric), 3) for metric in [central_accuracies] + central_demographic_parities + central_equalized_odds]

    # calculate the stdev of the differences for research question 1
    stdev_diff_rq1 = [round(stdev_of_difference(local, global_), 3) for local, global_ in zip(local_stdev, global_stdev)]

    # calculate the stdev of the differences for research question 2
    stdev_diff_rq2 = [round(stdev_of_difference(global_, central), 3) for global_, central in zip(global_stdev, central_stdev)]

    return local_stdev, global_stdev, central_stdev, stdev_diff_rq1, stdev_diff_rq2

def plot_stdev(local_means, global_means, central_means, local_stdev, global_stdev, central_stdev, title):
    metrics = ['Accuracy', 'DP sex', 'DP race', 'DP white', 'DP black', 'EO sex', 'EO race', 'EO white', 'EO black']

    bar_width = 0.25
    r1 = np.arange(len(local_means))
    r2 = [x + bar_width for x in r1]
    r3 = [x + bar_width for x in r2]

    # Plotting the bars
    plt.figure(figsize=(10, 8))
    plt.bar(r1, local_means, yerr=local_stdev, color='#1f77b4', width=bar_width, edgecolor='grey', label='Local Std Dev', capsize=3)
    plt.bar(r2, global_means, yerr=global_stdev, color='#ff7f0e', width=bar_width, edgecolor='grey', label='Global Std Dev', capsize=3)
    plt.bar(r3, central_means, yerr=central_stdev, color='#2ca02c', width=bar_width, edgecolor='grey', label='Central Std Dev', capsize=3)
    plt.xticks([r + bar_width for r in range(len(local_stdev))], metrics)

    plt.ylim(0, 1.1)

    # Adding title and labels
    plt.title(f'Comparison Pipelines {title}')
    plt.xlabel('Metrics')  # Replace with your actual x-axis label
    plt.ylabel('Standard Deviation')

    # Adding a legend
    plt.legend()
    plt.show()

In [5]:
def mean(local_accuracies, global_accuracies, central_accuracies, local_demographic_parities, global_demographic_parities, central_demographic_parities, local_equalized_odds, global_equalized_odds, central_equalized_odds):
    """
    Calculates mean over different seeds
    Input: lists of metric scores over all seeds, for all three pipelines and experiments
    Output: difference in means between the three pipelines for all metrics 
    """  

    # calculate mean for all local metric results
    local_means = [round(np.mean(metric), 3) for metric in [local_accuracies] + local_demographic_parities + local_equalized_odds]

    # calculate mean for all global metric results
    global_means = [round(np.mean(metric), 3) for metric in [global_accuracies] + global_demographic_parities + global_equalized_odds]

    # calculate mean for all central metric results
    central_means = [round(np.mean(metric), 3) for metric in [central_accuracies] + central_demographic_parities + central_equalized_odds]

    # list all differences for research question 1 and research question 2
    mean_diff_rq1 = [round(local_mean - global_mean, 3) for local_mean, global_mean in zip(local_means, global_means)]
    mean_diff_rq2 = [round(global_mean - central_mean, 3) for global_mean, central_mean in zip(global_means, central_means)]

    return local_means, global_means, central_means, mean_diff_rq1, mean_diff_rq2

The following cells provide the mean and standard deviation for the different data partitions. Additionally, it plots the standard deviation across all three pipelines.

In [6]:
###########################################
# EXPERIMENT 1: baseline
###########################################

file_path = 'results_experiments_1.txt'
with open(file_path, 'r') as file:
    ex1_content = file.read()

# Extract values
( ex1_local_accuracies, ex1_local_equalized_odds, ex1_local_demographic_parities,
 ex1_global_accuracies, ex1_global_equalized_odds, ex1_global_demographic_parities, 
 ex1_central_accuracies, ex1_central_equalized_odds, ex1_central_demographic_parities
 , ex1_train_samples, ex1_test_samples) = extract_values(ex1_content)


ex1_local, ex1_global, ex1_central, ex1_diff_rq1, ex1_diff_rq2 = mean(ex1_local_accuracies, ex1_global_accuracies, ex1_central_accuracies, ex1_local_demographic_parities, ex1_global_demographic_parities, ex1_central_demographic_parities, ex1_local_equalized_odds, ex1_global_equalized_odds, ex1_central_equalized_odds)

In [7]:
###########################################
# EXPERIMENT 2: Quantity (sample)
###########################################

file_path = 'results_experiments_2.txt'
with open(file_path, 'r') as file:
    ex2_content = file.read()

# Extract values
( ex2_local_accuracies, ex2_local_equalized_odds, ex2_local_demographic_parities,
 ex2_global_accuracies, ex2_global_equalized_odds, ex2_global_demographic_parities, 
 ex2_central_accuracies, ex2_central_equalized_odds, ex2_central_demographic_parities, 
 ex2_train_samples, ex2_test_samples) = extract_values(ex2_content)

ex2_local, ex2_global, ex2_central, ex2_diff_rq1, ex2_diff_rq2 = mean(ex2_local_accuracies, ex2_global_accuracies, ex2_central_accuracies, ex2_local_demographic_parities, ex2_global_demographic_parities, ex2_central_demographic_parities, ex2_local_equalized_odds, ex2_global_equalized_odds, ex2_central_equalized_odds)

In [8]:
###########################################
# EXPERIMENT 3: Quantity (all)
###########################################

file_path = 'results_experiments_3.txt'
with open(file_path, 'r') as file:
    ex3_content = file.read()

# Extract values
( ex3_local_accuracies, ex3_local_equalized_odds, ex3_local_demographic_parities,
 ex3_global_accuracies, ex3_global_equalized_odds, ex3_global_demographic_parities, 
 ex3_central_accuracies, ex3_central_equalized_odds, ex3_central_demographic_parities, 
 ex3_train_samples, ex3_test_samples) = extract_values(ex3_content)

ex3_local, ex3_global, ex3_central, ex3_diff_rq1, ex3_diff_rq2 = mean(ex3_local_accuracies, ex3_global_accuracies, ex3_central_accuracies, ex3_local_demographic_parities, ex3_global_demographic_parities, ex3_central_demographic_parities, ex3_local_equalized_odds, ex3_global_equalized_odds, ex3_central_equalized_odds)

In [9]:
###########################################
# EXPERIMENT 4: Quantity (polarised)
###########################################

file_path = 'results_experiments_4.txt'
with open(file_path, 'r') as file:
    ex4_content = file.read()

# Extract values
( ex4_local_accuracies, ex4_local_equalized_odds, ex4_local_demographic_parities,
 ex4_global_accuracies, ex4_global_equalized_odds, ex4_global_demographic_parities, 
 ex4_central_accuracies, ex4_central_equalized_odds, ex4_central_demographic_parities, 
 ex4_train_samples, ex4_test_samples) = extract_values(ex4_content)

ex4_local, ex4_global, ex4_central, ex4_diff_rq1, ex4_diff_rq2 = mean(ex4_local_accuracies, ex4_global_accuracies, ex4_central_accuracies, ex4_local_demographic_parities, ex4_global_demographic_parities, ex4_central_demographic_parities, ex4_local_equalized_odds, ex4_global_equalized_odds, ex4_central_equalized_odds)

In [10]:
###########################################
# EXPERIMENT 5: Quantity (distributed)
###########################################

file_path = 'results_experiments_5.txt'
with open(file_path, 'r') as file:
    ex5_content = file.read()

# Extract values
( ex5_local_accuracies, ex5_local_equalized_odds, ex5_local_demographic_parities,
 ex5_global_accuracies, ex5_global_equalized_odds, ex5_global_demographic_parities, 
 ex5_central_accuracies, ex5_central_equalized_odds, ex5_central_demographic_parities, 
 ex5_train_samples, ex5_test_samples) = extract_values(ex5_content)

ex5_local, ex5_global, ex5_central, ex5_diff_rq1, ex5_diff_rq2 = mean(ex5_local_accuracies, ex5_global_accuracies, ex5_central_accuracies, ex5_local_demographic_parities, ex5_global_demographic_parities, ex5_central_demographic_parities, ex5_local_equalized_odds, ex5_global_equalized_odds, ex5_central_equalized_odds)

In [11]:
###########################################
# EXPERIMENT 6: Label (100-0)
###########################################

file_path = 'results_experiments_6.txt'
with open(file_path, 'r') as file:
    ex6_content = file.read()

# Extract values
( ex6_local_accuracies, ex6_local_equalized_odds, ex6_local_demographic_parities,
 ex6_global_accuracies, ex6_global_equalized_odds, ex6_global_demographic_parities, 
 ex6_central_accuracies, ex6_central_equalized_odds, ex6_central_demographic_parities, 
 ex6_train_samples, ex6_test_samples) = extract_values(ex6_content)

ex6_local, ex6_global, ex6_central, ex6_diff_rq1, ex6_diff_rq2 = mean(ex6_local_accuracies, ex6_global_accuracies, ex6_central_accuracies, ex6_local_demographic_parities, ex6_global_demographic_parities, ex6_central_demographic_parities, ex6_local_equalized_odds, ex6_global_equalized_odds, ex6_central_equalized_odds)

In [12]:
###########################################
# EXPERIMENT 7: Label (75-25)
###########################################

file_path = 'results_experiments_7.txt'
with open(file_path, 'r') as file:
    ex7_content = file.read()

# Extract values
( ex7_local_accuracies, ex7_local_equalized_odds, ex7_local_demographic_parities,
 ex7_global_accuracies, ex7_global_equalized_odds, ex7_global_demographic_parities, 
 ex7_central_accuracies, ex7_central_equalized_odds, ex7_central_demographic_parities, 
 ex7_train_samples, ex7_test_samples) = extract_values(ex7_content)

ex7_local, ex7_global, ex7_central, ex7_diff_rq1, ex7_diff_rq2 = mean(ex7_local_accuracies, ex7_global_accuracies, ex6_central_accuracies, ex7_local_demographic_parities, ex7_global_demographic_parities, ex7_central_demographic_parities, ex7_local_equalized_odds, ex7_global_equalized_odds, ex6_central_equalized_odds)

In [13]:
###########################################
# EXPERIMENT 8: Feature (Sex, 100-0)
###########################################

file_path = 'results_experiments_8.txt'
with open(file_path, 'r') as file:
    ex8_content = file.read()

# Extract values
( ex8_local_accuracies, ex8_local_equalized_odds, ex8_local_demographic_parities,
 ex8_global_accuracies, ex8_global_equalized_odds, ex8_global_demographic_parities, 
 ex8_central_accuracies, ex8_central_equalized_odds, ex8_central_demographic_parities, 
 ex8_train_samples, ex8_test_samples) = extract_values(ex8_content)

ex8_local, ex8_global, ex8_central, ex8_diff_rq1, ex8_diff_rq2 = mean(ex8_local_accuracies, ex8_global_accuracies, ex8_central_accuracies, ex8_local_demographic_parities, ex8_global_demographic_parities, ex8_central_demographic_parities, ex8_local_equalized_odds, ex8_global_equalized_odds, ex8_central_equalized_odds)

In [14]:
###########################################
# EXPERIMENT 9: Feature (Sex, 75-25)
###########################################

file_path = 'results_experiments_9.txt'
with open(file_path, 'r') as file:
    ex9_content = file.read()

# Extract values
( ex9_local_accuracies, ex9_local_equalized_odds, ex9_local_demographic_parities,
 ex9_global_accuracies, ex9_global_equalized_odds, ex9_global_demographic_parities, 
 ex9_central_accuracies, ex9_central_equalized_odds, ex9_central_demographic_parities, 
 ex9_train_samples, ex9_test_samples) = extract_values(ex9_content)

ex9_local, ex9_global, ex9_central, ex9_diff_rq1, ex9_diff_rq2 = mean(ex9_local_accuracies, ex9_global_accuracies, ex9_central_accuracies, ex9_local_demographic_parities, ex9_global_demographic_parities, ex9_central_demographic_parities, ex9_local_equalized_odds, ex9_global_equalized_odds, ex9_central_equalized_odds)

In [15]:
###########################################
# EXPERIMENT 10: Feature (Race)
###########################################

file_path = 'results_experiments_10.txt'
with open(file_path, 'r') as file:
    ex10_content = file.read()

# Extract values
( ex10_local_accuracies, ex10_local_equalized_odds, ex10_local_demographic_parities,
 ex10_global_accuracies, ex10_global_equalized_odds, ex10_global_demographic_parities, 
 ex10_central_accuracies, ex10_central_equalized_odds, ex10_central_demographic_parities, 
 ex10_train_samples, ex10_test_samples) = extract_values(ex10_content)

ex10_local, ex10_global, ex10_central, ex10_diff_rq1, ex10_diff_rq2 = mean(ex10_local_accuracies, ex10_global_accuracies, ex10_central_accuracies, ex10_local_demographic_parities, ex10_global_demographic_parities, ex10_central_demographic_parities, ex10_local_equalized_odds, ex10_global_equalized_odds, ex10_central_equalized_odds)

In [16]:
###########################################
# EXPERIMENT 11: Feature (MS)
###########################################

file_path = 'results_experiments_11.txt'
with open(file_path, 'r') as file:
    ex11_content = file.read()

# Extract values
( ex11_local_accuracies, ex11_local_equalized_odds, ex11_local_demographic_parities,
 ex11_global_accuracies, ex11_global_equalized_odds, ex11_global_demographic_parities, 
 ex11_central_accuracies, ex11_central_equalized_odds, ex11_central_demographic_parities, 
 ex11_train_samples, ex11_test_samples) = extract_values(ex11_content)

ex11_local, ex11_global, ex11_central, ex11_diff_rq1, ex11_diff_rq2 = mean(ex11_local_accuracies, ex11_global_accuracies, ex11_central_accuracies, ex11_local_demographic_parities, ex11_global_demographic_parities, ex11_central_demographic_parities, ex11_local_equalized_odds, ex11_global_equalized_odds, ex11_central_equalized_odds)


In [17]:
###########################################
# EXPERIMENT 12: Quantity + Label (100-0)
###########################################

file_path = 'results_experiments_12.txt'
with open(file_path, 'r') as file:
    ex12_content = file.read()

# Extract values
( ex12_local_accuracies, ex12_local_equalized_odds, ex12_local_demographic_parities,
 ex12_global_accuracies, ex12_global_equalized_odds, ex12_global_demographic_parities, 
 ex12_central_accuracies, ex12_central_equalized_odds, ex12_central_demographic_parities, 
 ex12_train_samples, ex12_test_samples) = extract_values(ex12_content)

ex12_local, ex12_global, ex12_central, ex12_diff_rq1, ex12_diff_rq2 = mean(ex12_local_accuracies, ex12_global_accuracies, ex12_central_accuracies, ex12_local_demographic_parities, ex12_global_demographic_parities, ex12_central_demographic_parities, ex12_local_equalized_odds, ex12_global_equalized_odds, ex12_central_equalized_odds)

In [18]:
###########################################
# EXPERIMENT 13: Quantity + Label (75-25)
###########################################

file_path = 'results_experiments_13.txt'
with open(file_path, 'r') as file:
    ex13_content = file.read()

# Extract values
( ex13_local_accuracies, ex13_local_equalized_odds, ex13_local_demographic_parities,
 ex13_global_accuracies, ex13_global_equalized_odds, ex13_global_demographic_parities, 
 ex13_central_accuracies, ex13_central_equalized_odds, ex13_central_demographic_parities, 
 ex13_train_samples, ex13_test_samples) = extract_values(ex13_content)

ex13_local, ex13_global, ex13_central, ex13_diff_rq1, ex13_diff_rq2 = mean(ex13_local_accuracies, ex13_global_accuracies, ex13_central_accuracies, ex13_local_demographic_parities, ex13_global_demographic_parities, ex13_central_demographic_parities, ex13_local_equalized_odds, ex13_global_equalized_odds, ex13_central_equalized_odds)

In [19]:
###########################################
# EXPERIMENT 14: Quantity + Feature (Sex, 100-0)
###########################################

file_path = 'results_experiments_14.txt'
with open(file_path, 'r') as file:
    ex14_content = file.read()

# Extract values
( ex14_local_accuracies, ex14_local_equalized_odds, ex14_local_demographic_parities,
 ex14_global_accuracies, ex14_global_equalized_odds, ex14_global_demographic_parities, 
 ex14_central_accuracies, ex14_central_equalized_odds, ex14_central_demographic_parities, 
 ex14_train_samples, ex14_test_samples) = extract_values(ex14_content)

ex14_local, ex14_global, ex14_central, ex14_diff_rq1, ex14_diff_rq2 = mean(ex14_local_accuracies, ex14_global_accuracies, ex14_central_accuracies, ex14_local_demographic_parities, ex14_global_demographic_parities, ex14_central_demographic_parities, ex14_local_equalized_odds, ex14_global_equalized_odds, ex14_central_equalized_odds)

In [20]:
###########################################
# EXPERIMENT 15: Quantity + Feature (Sex, 75-25)
###########################################

file_path = 'results_experiments_15.txt'
with open(file_path, 'r') as file:
    ex15_content = file.read()

# Extract values
( ex15_local_accuracies, ex15_local_equalized_odds, ex15_local_demographic_parities,
 ex15_global_accuracies, ex15_global_equalized_odds, ex15_global_demographic_parities, 
 ex15_central_accuracies, ex15_central_equalized_odds, ex15_central_demographic_parities, 
 ex15_train_samples, ex15_test_samples) = extract_values(ex15_content)

ex15_local, ex15_global, ex15_central, ex15_diff_rq1, ex15_diff_rq2 = mean(ex15_local_accuracies, ex15_global_accuracies, ex15_central_accuracies, ex15_local_demographic_parities, ex15_global_demographic_parities, ex15_central_demographic_parities, ex15_local_equalized_odds, ex15_global_equalized_odds, ex15_central_equalized_odds)

In [21]:
###########################################
# EXPERIMENT 16: Quantity + Feature (Race)
###########################################

file_path = 'results_experiments_16.txt'
with open(file_path, 'r') as file:
    ex16_content = file.read()

# Extract values
( ex16_local_accuracies, ex16_local_equalized_odds, ex16_local_demographic_parities,
 ex16_global_accuracies, ex16_global_equalized_odds, ex16_global_demographic_parities, 
 ex16_central_accuracies, ex16_central_equalized_odds, ex16_central_demographic_parities, 
 ex16_train_samples, ex16_test_samples) = extract_values(ex16_content)

ex16_local, ex16_global, ex16_central, ex16_diff_rq1, ex16_diff_rq2 = mean(ex16_local_accuracies, ex16_global_accuracies, ex16_central_accuracies, ex16_local_demographic_parities, ex16_global_demographic_parities, ex16_central_demographic_parities, ex16_local_equalized_odds, ex16_global_equalized_odds, ex16_central_equalized_odds)

In [22]:
###########################################
# EXPERIMENT 17: Quantity + Feature (MS)
###########################################

file_path = 'results_experiments_17.txt'
with open(file_path, 'r') as file:
    ex17_content = file.read()

# Extract values
( ex17_local_accuracies, ex17_local_equalized_odds, ex17_local_demographic_parities,
 ex17_global_accuracies, ex17_global_equalized_odds, ex17_global_demographic_parities, 
 ex17_central_accuracies, ex17_central_equalized_odds, ex17_central_demographic_parities, 
 ex17_train_samples, ex17_test_samples) = extract_values(ex17_content)

ex17_local, ex17_global, ex17_central, ex17_diff_rq1, ex17_diff_rq2 = mean(ex17_local_accuracies, ex17_global_accuracies, ex17_central_accuracies, ex17_local_demographic_parities, ex17_global_demographic_parities, ex17_central_demographic_parities, ex17_local_equalized_odds, ex17_global_equalized_odds, ex17_central_equalized_odds)

In [23]:
from scipy.stats import shapiro

shapiro_count = 0
all_count = 0

for i in range(1, 17):
    file_path = f'results_experiments_{i}.txt'
    with open(file_path, 'r') as file:
        content = file.read()
        
    for metric in extract_values_1(content):
        for lst_seed in metric:

            shapiro_score = shapiro(lst_seed)
            all_count += 1
            if shapiro_score.pvalue > 0.005:
                shapiro_count += 1
                
print('Shapiro count:', shapiro_count)
print('All count:', all_count)      

Shapiro count: 374
All count: 384


  shapiro_score = shapiro(lst_seed)


In [24]:
from scipy import stats  

In [25]:
# Equalized Odds, Local vs. Global
lst_locals_eo = [ex1_local_equalized_odds, ex2_local_equalized_odds, ex3_local_equalized_odds, ex4_local_equalized_odds, ex5_local_equalized_odds, ex6_local_equalized_odds, ex7_local_equalized_odds, ex8_local_equalized_odds, ex9_local_equalized_odds, ex10_local_equalized_odds, ex11_local_equalized_odds, ex12_local_equalized_odds, ex13_local_equalized_odds, ex14_local_equalized_odds, ex15_local_equalized_odds, ex16_local_equalized_odds, ex17_local_equalized_odds]
lst_globals_eo = [ex1_global_equalized_odds, ex2_global_equalized_odds, ex3_global_equalized_odds, ex4_global_equalized_odds, ex5_global_equalized_odds, ex6_global_equalized_odds, ex7_global_equalized_odds, ex8_global_equalized_odds, ex9_global_equalized_odds, ex10_global_equalized_odds, ex11_global_equalized_odds, ex12_global_equalized_odds, ex13_global_equalized_odds, ex14_global_equalized_odds, ex15_global_equalized_odds, ex16_global_equalized_odds, ex17_global_equalized_odds]
i = 0
for lst_local, lst_global in zip(lst_locals_eo, lst_globals_eo):
    print('Experimental Partition: ', i)
    
    x_eo_sex, y_eo_sex = lst_local[0], lst_global[0]
    t_stat_eo_sex, p_val_eo_sex = stats.ttest_ind(x_eo_sex, y_eo_sex)  
    print('EO sex local vs. global:', p_val_eo_sex)

    x_eo_race, y_eo_race = lst_local[1], lst_global[1]
    t_stat_eo_race, p_val_eo_race = stats.ttest_ind(x_eo_race, y_eo_race)  
    print('EO race local vs. global:', p_val_eo_race)

    x_eo_white, y_eo_white = lst_local[2], lst_global[2]
    t_stat_eo_white, p_val_eo_white = stats.ttest_ind(x_eo_white, y_eo_white) 
    print('EO white local vs. global:', p_val_eo_white) 

    x_eo_black, y_eo_black = lst_local[3], lst_global[3]
    t_stat_eo_black, p_val_eo_black = stats.ttest_ind(x_eo_black, y_eo_black)  
    print('EO black local vs. global:', p_val_eo_black)

    print('-'*20)
    i += 1

Experimental Partition:  0
EO sex local vs. global: 0.9498060350778585
EO race local vs. global: 0.0003126085772704813
EO white local vs. global: 0.8957804421613713
EO black local vs. global: 0.0016551438289786953
--------------------
Experimental Partition:  1
EO sex local vs. global: 0.9438911802690364
EO race local vs. global: 3.5073695210309757e-08
EO white local vs. global: 0.9562686702036487
EO black local vs. global: 0.002551975371248984
--------------------
Experimental Partition:  2
EO sex local vs. global: 0.6456520726089054
EO race local vs. global: 0.00031126453767262827
EO white local vs. global: 0.9129359842825256
EO black local vs. global: 1.1915028914995224e-05
--------------------
Experimental Partition:  3
EO sex local vs. global: 0.49910432275110317
EO race local vs. global: 0.00015121582296867369
EO white local vs. global: 0.7779777249097675
EO black local vs. global: 5.480287038098872e-06
--------------------
Experimental Partition:  4
EO sex local vs. global: 0.56

  res = hypotest_fun_out(*samples, **kwds)


In [26]:
# Demographic Parity Local vs. Global
lst_locals_dp = [ex1_local_demographic_parities, ex2_local_demographic_parities, ex3_local_demographic_parities, ex4_local_demographic_parities, ex5_local_demographic_parities, ex6_local_demographic_parities, ex7_local_demographic_parities, ex8_local_demographic_parities, ex9_local_demographic_parities, ex10_local_demographic_parities, ex11_local_demographic_parities, ex12_local_demographic_parities, ex13_local_demographic_parities, ex14_local_demographic_parities, ex15_local_demographic_parities, ex16_local_demographic_parities, ex17_local_demographic_parities]
lst_globals_dp = [ex1_global_demographic_parities, ex2_global_demographic_parities, ex3_global_demographic_parities, ex4_global_demographic_parities, ex5_global_demographic_parities, ex6_global_demographic_parities, ex7_global_demographic_parities, ex8_global_demographic_parities, ex9_global_demographic_parities, ex10_global_demographic_parities, ex11_global_demographic_parities, ex12_global_demographic_parities, ex13_global_demographic_parities, ex14_global_demographic_parities, ex15_global_demographic_parities, ex16_global_demographic_parities, ex17_global_demographic_parities]
i = 0
for lst_local, lst_global in zip(lst_locals_dp, lst_globals_dp):
    print('Experimental Partition: ', i)
    
    x_dp_sex, y_dp_sex = lst_local[0], lst_global[0]
    t_stat_dp_sex, p_val_dp_sex = stats.ttest_ind(x_dp_sex, y_dp_sex)  
    print('DP sex local vs. global:', p_val_dp_sex)

    x_dp_race, y_dp_race = lst_local[1], lst_global[1]
    t_stat_dp_race, p_val_dp_race = stats.ttest_ind(x_dp_race, y_dp_race) 
    print('DP race local vs. global:', p_val_dp_race)

    x_dp_white, y_dp_white = lst_local[2], lst_global[2]
    t_stat_dp_white, p_val_dp_white = stats.ttest_ind(x_dp_white, y_dp_white) 
    print('DP white local vs. global:', p_val_dp_white)

    x_dp_black, y_dp_black = lst_local[3], lst_global[3]
    t_stat_dp_black, p_val_dp_black = stats.ttest_ind(x_dp_black, y_dp_black)  
    print('DP black local vs. global:', p_val_dp_black)

    print('-'*20)
    i += 1

Experimental Partition:  0
DP sex local vs. global: 0.9885189307539684
DP race local vs. global: 9.028889264970163e-10
DP white local vs. global: 0.9386270236195402
DP black local vs. global: 0.9918805498625757
--------------------
Experimental Partition:  1
DP sex local vs. global: 0.9694346280529432
DP race local vs. global: 2.11865334777529e-06
DP white local vs. global: 0.9865404134492942
DP black local vs. global: 0.963299595221623
--------------------
Experimental Partition:  2
DP sex local vs. global: 0.9648426213241068
DP race local vs. global: 6.934966907292985e-05
DP white local vs. global: 0.799942834797672
DP black local vs. global: 0.1183062606588603
--------------------
Experimental Partition:  3
DP sex local vs. global: 0.9438354731461607
DP race local vs. global: 0.00022864244136153164
DP white local vs. global: 0.8679044154620226
DP black local vs. global: 0.6925441180911291
--------------------
Experimental Partition:  4
DP sex local vs. global: 0.9947801284152307
DP 

In [27]:
# Equalized Odds Global vs. Central
lst_globals = [ex5_global_equalized_odds, ex6_global_equalized_odds, ex7_global_equalized_odds, ex8_global_equalized_odds, ex9_global_equalized_odds, ex10_global_equalized_odds, ex11_global_equalized_odds, ex12_global_equalized_odds, ex13_global_equalized_odds, ex14_global_equalized_odds, ex15_global_equalized_odds, ex16_global_equalized_odds, ex17_global_equalized_odds]
lst_centrals = [ex5_central_equalized_odds, ex6_central_equalized_odds, ex7_central_equalized_odds, ex8_central_equalized_odds, ex9_central_equalized_odds, ex10_central_equalized_odds, ex11_central_equalized_odds, ex12_central_equalized_odds, ex13_central_equalized_odds, ex14_central_equalized_odds, ex15_central_equalized_odds, ex16_central_equalized_odds, ex17_central_equalized_odds]
i = 0
for lst_global, lst_central in zip(lst_globals, lst_centrals):
    print('Experimental Partition: ', i)

    x_eo_sex, y_eo_sex = lst_global[0], lst_central[0]
    t_stat_eo_sex, p_val_eo_sex = stats.ttest_ind(x_eo_sex, y_eo_sex)  
    print('EO sex global vs. central:', p_val_eo_sex)

    x_eo_race, y_eo_race = lst_global[1], lst_central[1]
    t_stat_eo_race, p_val_eo_race = stats.ttest_ind(x_eo_race, y_eo_race)  
    print('EO race global vs. central:', p_val_eo_race)

    x_eo_white, y_eo_white = lst_global[2], lst_central[2]
    t_stat_eo_white, p_val_eo_white = stats.ttest_ind(x_eo_white, y_eo_white) 
    print('EO white global vs. central:', p_val_eo_white) 

    x_eo_black, y_eo_black = lst_global[3], lst_central[3]
    t_stat_eo_black, p_val_eo_black = stats.ttest_ind(x_eo_black, y_eo_black)  
    print('EO black global vs. central:', p_val_eo_black)

    print('------------------------------')
    i += 1

Experimental Partition:  0
EO sex global vs. central: 0.0006943595263866969
EO race global vs. central: 0.9454838275258712
EO white global vs. central: 0.022140827770866088
EO black global vs. central: 0.7857865148255778
------------------------------
Experimental Partition:  1
EO sex global vs. central: 0.016160013863999147
EO race global vs. central: 0.626886786382136
EO white global vs. central: 0.5082797914841419
EO black global vs. central: 0.12428710864021085
------------------------------
Experimental Partition:  2
EO sex global vs. central: 0.7931295488607606
EO race global vs. central: 0.050887459646863774
EO white global vs. central: 0.013172658222212974
EO black global vs. central: 0.7524264142456049
------------------------------
Experimental Partition:  3
EO sex global vs. central: 0.005500884339979406
EO race global vs. central: 0.12699562728809385
EO white global vs. central: 0.008536967603265444
EO black global vs. central: 0.04860273989119754
--------------------------

In [28]:
# Demographic Parity Global vs. Central
lst_globals = [ex1_global_demographic_parities, ex2_global_demographic_parities, ex3_global_demographic_parities, ex4_global_demographic_parities, ex5_global_demographic_parities, ex6_global_demographic_parities, ex7_global_demographic_parities, ex8_global_demographic_parities, ex9_global_demographic_parities, ex10_global_demographic_parities, ex11_global_demographic_parities, ex12_global_demographic_parities, ex13_global_demographic_parities, ex14_global_demographic_parities, ex15_global_demographic_parities, ex16_global_demographic_parities, ex17_global_demographic_parities]
lst_centrals = [ex1_central_demographic_parities, ex2_central_demographic_parities, ex3_central_demographic_parities, ex4_central_demographic_parities, ex5_central_demographic_parities, ex6_central_demographic_parities, ex7_central_demographic_parities, ex8_central_demographic_parities, ex9_central_demographic_parities, ex10_central_demographic_parities, ex11_central_demographic_parities, ex12_central_demographic_parities, ex13_central_demographic_parities, ex14_central_demographic_parities, ex15_central_demographic_parities, ex16_central_demographic_parities, ex17_central_demographic_parities]
i = 0
for lst_global, lst_central in zip(lst_globals, lst_centrals):
    print('Experimental Partition: ', i)
    
    x_dp_sex, y_dp_sex = lst_global[0], lst_central[0]
    t_stat_dp_sex, p_val_dp_sex = stats.ttest_ind(x_dp_sex, y_dp_sex)  
    print('DP sex global vs. central:', p_val_dp_sex)

    x_dp_race, y_dp_race = lst_global[1], lst_central[1]
    t_stat_dp_race, p_val_dp_race = stats.ttest_ind(x_dp_race, y_dp_race) 
    print('DP race global vs. central:', p_val_dp_race)

    x_dp_white, y_dp_white = lst_global[2], lst_central[2]
    t_stat_dp_white, p_val_dp_white = stats.ttest_ind(x_dp_white, y_dp_white) 
    print('DP white global vs. central:', p_val_dp_white)

    x_dp_black, y_dp_black = lst_global[3], lst_central[3]
    t_stat_dp_black, p_val_dp_black = stats.ttest_ind(x_dp_black, y_dp_black)  
    print('DP black global vs. central:', p_val_dp_black)

    print('-'*20)
    i += 1

Experimental Partition:  0
DP sex global vs. central: 0.2444985979793578
DP race global vs. central: 0.938518568916681
DP white global vs. central: 0.01587405744598924
DP black global vs. central: 0.8087216393403481
--------------------
Experimental Partition:  1
DP sex global vs. central: 0.8317770377415115
DP race global vs. central: 0.14000235100402766
DP white global vs. central: 0.871503732472893
DP black global vs. central: 0.6016250882375005
--------------------
Experimental Partition:  2
DP sex global vs. central: 0.04243460768979023
DP race global vs. central: 0.45851278707394827
DP white global vs. central: 0.032784154447700664
DP black global vs. central: 0.3470771424230713
--------------------
Experimental Partition:  3
DP sex global vs. central: 0.11822475340367954
DP race global vs. central: 0.478217844116653
DP white global vs. central: 0.04392940261264918
DP black global vs. central: 0.23493133869326063
--------------------
Experimental Partition:  4
DP sex global vs. c

In [29]:
import itertools

# combine all 
merged_baseline = list(itertools.chain(*ex1_local_equalized_odds))
lst_locals_eo = [ex1_local_equalized_odds, ex2_local_equalized_odds, ex3_local_equalized_odds, ex4_local_equalized_odds, ex5_local_equalized_odds, ex6_local_equalized_odds, ex7_local_equalized_odds, ex8_local_equalized_odds, ex9_local_equalized_odds, ex10_local_equalized_odds, ex11_local_equalized_odds, ex12_local_equalized_odds, ex13_local_equalized_odds, ex14_local_equalized_odds, ex15_local_equalized_odds, ex16_local_equalized_odds, ex17_local_equalized_odds]
lst_globals_eo = [ex1_global_equalized_odds, ex2_global_equalized_odds, ex3_global_equalized_odds, ex4_global_equalized_odds, ex5_global_equalized_odds, ex6_global_equalized_odds, ex7_global_equalized_odds, ex8_global_equalized_odds, ex9_global_equalized_odds, ex10_global_equalized_odds, ex11_global_equalized_odds, ex12_global_equalized_odds, ex13_global_equalized_odds, ex14_global_equalized_odds, ex15_global_equalized_odds, ex16_global_equalized_odds, ex17_global_equalized_odds]
lst_centrals_eo = [ex1_central_equalized_odds, ex2_central_equalized_odds, ex3_central_equalized_odds, ex4_central_equalized_odds, ex5_central_equalized_odds, ex6_central_equalized_odds, ex7_central_equalized_odds, ex8_central_equalized_odds, ex9_central_equalized_odds, ex10_central_equalized_odds, ex11_central_equalized_odds, ex12_central_equalized_odds, ex13_central_equalized_odds, ex14_central_equalized_odds, ex15_central_equalized_odds, ex16_central_equalized_odds, ex17_central_equalized_odds]
lst_locals_dp = [ex1_local_demographic_parities, ex2_local_demographic_parities, ex3_local_demographic_parities, ex4_local_demographic_parities, ex5_local_demographic_parities, ex6_local_demographic_parities, ex7_local_demographic_parities, ex8_local_demographic_parities, ex9_local_demographic_parities, ex10_local_demographic_parities, ex11_local_demographic_parities, ex12_local_demographic_parities, ex13_local_demographic_parities, ex14_local_demographic_parities, ex15_local_demographic_parities, ex16_local_demographic_parities, ex17_local_demographic_parities]
lst_globals_dp = [ex1_global_demographic_parities, ex2_global_demographic_parities, ex3_global_demographic_parities, ex4_global_demographic_parities, ex5_global_demographic_parities, ex6_global_demographic_parities, ex7_global_demographic_parities, ex8_global_demographic_parities, ex9_global_demographic_parities, ex10_global_demographic_parities, ex11_global_demographic_parities, ex12_global_demographic_parities, ex13_global_demographic_parities, ex14_global_demographic_parities, ex15_global_demographic_parities, ex16_global_demographic_parities, ex17_global_demographic_parities]
lst_centrals_dp = [ex1_central_demographic_parities, ex2_central_demographic_parities, ex3_central_demographic_parities, ex4_central_demographic_parities, ex5_central_demographic_parities, ex6_central_demographic_parities, ex7_central_demographic_parities, ex8_central_demographic_parities, ex9_central_demographic_parities, ex10_central_demographic_parities, ex11_central_demographic_parities, ex12_central_demographic_parities, ex13_central_demographic_parities, ex14_central_demographic_parities, ex15_central_demographic_parities, ex16_central_demographic_parities, ex17_central_demographic_parities]


shapiro_count = 0
all_count = 0

for partition in lst_locals_eo + lst_globals_eo + lst_centrals_eo + lst_locals_dp + lst_globals_dp + lst_centrals_dp:
    merged_metric = list(itertools.chain(*partition))
    shapiro_score = shapiro(merged_metric)
    all_count += 1
    if shapiro_score.pvalue > 0.005:
        shapiro_count += 1
    
print('Shapiro count:', shapiro_count)
print('All count:', all_count)

Shapiro count: 70
All count: 102


In [30]:
# Equalized Odds
# cross t-test between baseline and partitions with heterogeneity
# scores of all sensitive attributes are aggregated

# local
merged_local_baseline_eo = list(itertools.chain(*ex1_local_equalized_odds))
lst_locals_eo = [ex2_local_equalized_odds, ex3_local_equalized_odds, ex4_local_equalized_odds, ex5_local_equalized_odds, ex6_local_equalized_odds, ex7_local_equalized_odds, ex8_local_equalized_odds, ex9_local_equalized_odds, ex10_local_equalized_odds, ex11_local_equalized_odds, ex12_local_equalized_odds, ex13_local_equalized_odds, ex14_local_equalized_odds, ex15_local_equalized_odds, ex16_local_equalized_odds, ex17_local_equalized_odds]

for i, partition in enumerate(lst_locals_eo):
    merged_partition_local = list(itertools.chain(*partition))
    t_stat_local, p_val_local = stats.ttest_ind(merged_local_baseline_eo, merged_partition_local)  
    print(f'EO local baseline vs. exp. partition {i}: ')
    print('P-value:', p_val_local)
    print('T-stat:', t_stat_local)
    print('-'*20)
    
print('-'*20)

# global
merged_global_baseline_eo = list(itertools.chain(*ex1_global_equalized_odds))
lst_globals_eo = [ex2_global_equalized_odds, ex3_global_equalized_odds, ex4_global_equalized_odds, ex5_global_equalized_odds, ex6_global_equalized_odds, ex7_global_equalized_odds, ex8_global_equalized_odds, ex9_local_equalized_odds, ex10_global_equalized_odds, ex11_global_equalized_odds, ex12_local_equalized_odds, ex13_global_equalized_odds, ex14_global_equalized_odds, ex15_global_equalized_odds, ex16_global_equalized_odds, ex17_global_equalized_odds]

for i, partition in enumerate(lst_globals_eo):
    merged_partition_global = list(itertools.chain(*partition))
    t_stat_global, p_val_global = stats.ttest_ind(merged_global_baseline_eo, merged_partition_global)  
    print(f'EO global baseline vs. exp. partition {i}: ')
    print('P-value:', p_val_global)
    print('T-stat:', t_stat_global)
    print('-'*20)
    
print('-'*40)

# central
merged_central_baseline_eo = list(itertools.chain(*ex1_local_equalized_odds))
lst_locals_eo = [ex2_local_equalized_odds, ex3_local_equalized_odds, ex4_local_equalized_odds, ex5_local_equalized_odds, ex6_local_equalized_odds, ex7_local_equalized_odds, ex8_local_equalized_odds, ex9_local_equalized_odds, ex10_local_equalized_odds, ex11_local_equalized_odds, ex12_local_equalized_odds, ex13_local_equalized_odds, ex14_local_equalized_odds, ex15_local_equalized_odds, ex16_local_equalized_odds, ex17_local_equalized_odds]

for i, partition in enumerate(lst_locals_eo):
    merged_partition_central = list(itertools.chain(*partition))
    t_stat_central, p_val_central = stats.ttest_ind(merged_central_baseline_eo, merged_partition_central)  
    print(f'EO central baseline vs. exp. partition {i}: ')
    print('P-value:', p_val_central)
    print('T-stat:', t_stat_central)
    print('-'*20)

EO local baseline vs. exp. partition 0: 
P-value: 0.7363387421255947
T-stat: 0.3391836784291654
--------------------
EO local baseline vs. exp. partition 1: 
P-value: 0.4827159912292436
T-stat: 0.7088955791724336
--------------------
EO local baseline vs. exp. partition 2: 
P-value: 0.5810099145738401
T-stat: 0.5566801356519728
--------------------
EO local baseline vs. exp. partition 3: 
P-value: 0.4016668986102696
T-stat: 0.8481474155381252
--------------------
EO local baseline vs. exp. partition 4: 
P-value: 0.4256304619204544
T-stat: -0.8053466091052812
--------------------
EO local baseline vs. exp. partition 5: 
P-value: 0.7555934444708702
T-stat: -0.3135289245364439
--------------------
EO local baseline vs. exp. partition 6: 
P-value: 0.5447504541785853
T-stat: -0.6111326465711467
--------------------
EO local baseline vs. exp. partition 7: 
P-value: 0.9208557879187962
T-stat: -0.10001808688217144
--------------------
EO local baseline vs. exp. partition 8: 
P-value: 0.0698012

In [31]:
# Demographic Parities
# cross t-test between baseline and partitions with heterogeneity
# scores of all sensitive attributes are aggregated

# local
merged_local_baseline_dp = list(itertools.chain(*ex1_local_demographic_parities))
lst_locals_dp = [ex2_local_demographic_parities, ex3_local_demographic_parities, ex4_local_demographic_parities, ex5_local_demographic_parities, ex6_local_demographic_parities, ex7_local_demographic_parities, ex8_local_demographic_parities, ex9_local_demographic_parities, ex10_local_demographic_parities, ex11_local_demographic_parities, ex12_local_demographic_parities, ex13_local_demographic_parities, ex14_local_demographic_parities, ex15_local_demographic_parities, ex16_local_demographic_parities, ex17_local_demographic_parities]

for i, partition in enumerate(lst_locals_dp):
    merged_partition_local = list(itertools.chain(*partition))
    t_stat_local, p_val_local = stats.ttest_ind(merged_local_baseline_dp, merged_partition_local)  
    print(f'DP local baseline vs. exp. partition {i}: ')
    print('P-value:', p_val_local)
    print('T-stat:', t_stat_local)
    print('-'*20)
    
print('-'*20)

# global
merged_global_baseline_dp = list(itertools.chain(*ex1_global_demographic_parities))
lst_globals_dp = [ex2_global_demographic_parities, ex3_global_demographic_parities, ex4_global_demographic_parities, ex5_global_demographic_parities, ex6_global_demographic_parities, ex7_global_demographic_parities, ex8_global_equalized_odds, ex9_local_demographic_parities, ex10_global_demographic_parities, ex11_global_demographic_parities, ex12_local_demographic_parities, ex13_global_demographic_parities, ex14_global_demographic_parities, ex15_global_demographic_parities, ex16_global_demographic_parities, ex17_global_demographic_parities]

for i, partition in enumerate(lst_globals_dp):
    merged_partition_global = list(itertools.chain(*partition))
    t_stat_global, p_val_global = stats.ttest_ind(merged_global_baseline_dp, merged_partition_global)  
    print(f'DP global baseline vs. exp. partition {i}: ')
    print('P-value:', p_val_global)
    print('T-stat:', t_stat_global)
    print('-'*20)
    
print('-'*40)

# central
merged_central_baseline_dp = list(itertools.chain(*ex1_local_demographic_parities))
lst_locals_dp = [ex2_local_demographic_parities, ex3_local_demographic_parities, ex4_local_demographic_parities, ex5_local_demographic_parities, ex6_local_demographic_parities, ex7_local_demographic_parities, ex8_local_demographic_parities, ex9_local_demographic_parities, ex10_local_demographic_parities, ex11_local_demographic_parities, ex12_local_demographic_parities, ex13_local_demographic_parities, ex14_local_demographic_parities, ex15_local_demographic_parities, ex16_local_demographic_parities, ex17_local_demographic_parities]

for i, partition in enumerate(lst_locals_dp):
    merged_partition_central = list(itertools.chain(*partition))
    t_stat_central, p_val_central = stats.ttest_ind(merged_central_baseline_dp, merged_partition_central)  
    print(f'DP central baseline vs. exp. partition {i}: ')
    print('P-value:', p_val_central)
    print('T-stat:', t_stat_central)
    print('-'*20)


DP local baseline vs. exp. partition 0: 
P-value: 0.8656102841226987
T-stat: 0.17038632796070244
--------------------
DP local baseline vs. exp. partition 1: 
P-value: 0.8362336127485084
T-stat: 0.2081369359948047
--------------------
DP local baseline vs. exp. partition 2: 
P-value: 0.866060442536285
T-stat: -0.169809922784167
--------------------
DP local baseline vs. exp. partition 3: 
P-value: 0.4763995258354966
T-stat: 0.7192296182996462
--------------------
DP local baseline vs. exp. partition 4: 
P-value: 0.46694601880890696
T-stat: -0.7348431286384811
--------------------
DP local baseline vs. exp. partition 5: 
P-value: 0.40796171516343105
T-stat: -0.8367551046627857
--------------------
DP local baseline vs. exp. partition 6: 
P-value: 0.4155461716852382
T-stat: -0.8231724910524568
--------------------
DP local baseline vs. exp. partition 7: 
P-value: 0.9195996539596989
T-stat: -0.10161109350929443
--------------------
DP local baseline vs. exp. partition 8: 
P-value: 0.06433