In [1]:
import os
import numpy as np
import pandas as pd
from scipy.stats import ks_2samp, wilcoxon

from data_processing.functions import combine_summary_dataframes as combine_summary_dataframes

In [2]:
def create_excel_file(dataframes_dict, file_name):
    results_dir = "./results"
    if not os.path.isdir(results_dir):
            os.mkdir(results_dir)
    file_name = os.path.join(results_dir, file_name)
    with pd.ExcelWriter(file_name) as writer:
        for sheet_name, dataframe in dataframes_dict.items():
            dataframe.to_excel(writer, sheet_name=sheet_name)

### Kolmogorov-Smirnov test

In [3]:
def check_different_pairs(results_ks, p_value_threshold, file_name):
    results_dir = "./results"
    if ".txt" not in file_name:
        file_name += ".txt"
    file_name = os.path.join(results_dir, file_name)
    if not os.path.isdir(results_dir):
            os.mkdir(results_dir)
    
    with open(file_name, "w") as file:

        for algorithm, df in results_ks.items():
            file.write(f"Algorithm: {algorithm}\n")
            below_threshold_pairs = []
            for row in df.index:
                for col in df.columns:
                    if not np.isnan(df.at[row, col]) and df.at[row, col] > p_value_threshold and df.at[row, col] != 1:
                        below_threshold_pairs.append((row, col, df.at[row, col]))
            for pair in below_threshold_pairs:
                file.write(f"{pair[0]} vs {pair[1]}: {pair[2]}\n")
            file.write('-'*40 + "\n")

def ks_test(input_df, outer_keys, inner_keys, table_title, p_value_threshold=0.005):
    # Create a dictionary to store the tables
    results_tables = dict()

    # Iterate over the inner keys
    for inner_key in inner_keys:
        # Create an empty DataFrame for the current inner key
        results_df = pd.DataFrame(columns=outer_keys, index=outer_keys)
        
        # Iterate over the outer keys
        for i, outer_key_i in enumerate(outer_keys):
            for j, outer_key_j in enumerate(outer_keys):
                if i<=j:
                    # Check if the inner key exists for both outer keys
                    column_i = input_df[(input_df["mode"] == outer_key_i) & (input_df["algorithm"] == inner_key)]["Req/s"].tolist()
                    column_j = input_df[(input_df["mode"] == outer_key_j) & (input_df["algorithm"] == inner_key)]["Req/s"].tolist()
                    # Perform the Kolmogorov-Smirnov test on the inner pandas series
                    _, p_value = ks_2samp(column_i, column_j)
                    # Store the p-value in the DataFrame
                    results_df.iloc[i, j] = p_value
        
        # Store the DataFrame for the current inner key
        results_tables[inner_key] = results_df
        create_excel_file(results_tables, table_title)
        #check_different_pairs(results_tables, p_value_threshold, table_title.replace(".xlsx", ".txt"))
    return results_tables

### Wilcoxon test

In [4]:
def wilcoxon_test(input_df, outer_keys, inner_keys, table_title, p_value_threshold=0.005):
    # Create a dictionary to store the tables
    results_tables = dict()

    # Iterate over the inner keys
    for inner_key in inner_keys:
        # Create an empty DataFrame for the current inner key
        results_df = pd.DataFrame(columns=outer_keys, index=outer_keys)

        # Iterate over the outer keys
        for i, outer_key_i in enumerate(outer_keys):
            for j, outer_key_j in enumerate(outer_keys):
                if i < j:
                    # Select the data for the two groups
                    group_i = input_df[(input_df["mode"] == outer_key_i) & (input_df["algorithm"] == inner_key)]["Req/s"].tolist()
                    group_j = input_df[(input_df["mode"] == outer_key_j) & (input_df["algorithm"] == inner_key)]["Req/s"].tolist()

                    # Perform the Wilcoxon test if both groups have data
                    if group_i and group_j:
                        _, p_value = wilcoxon(group_i, group_j)
                        # Store the p-value in the DataFrame
                        results_df.at[outer_key_i, outer_key_j] = p_value

        # Store the DataFrame for the current inner key
        results_tables[inner_key] = results_df

    # The creation of the excel file and checking of different pairs would be done here.
    create_excel_file(results_tables, table_title)

    return results_tables

### Parameters

In [5]:
p_value_threshold = 0.0005
n_clients = 1000
n_requests = 1000
inner_keys = ['P-256', 'P-384', 'P-521', 'X25519', 'X448', 'kyber1024', 'kyber512', 'kyber768',
              'p256_kyber512', 'p384_kyber768', 'p521_kyber1024']

### Figure 5

In [6]:
outer_keys = ["qrng-extraction-qrandom-xor", "qrng-extraction-rngtools_reseed1", "no-qrng"]
modes_df, _ = combine_summary_dataframes(n_clients, n_requests, outer_keys, n_runs=100)
title_ks = 'ks_test_by_algorithm_fig_5.xlsx'
results_ks = ks_test(modes_df, outer_keys, inner_keys, title_ks, p_value_threshold)
title_wilcoxon = 'wilcoxon_test_by_algorithm_fig_5.xlsx'
results_wilcoxon = wilcoxon_test(modes_df, outer_keys, inner_keys, title_wilcoxon, p_value_threshold)

### Figure 4

In [7]:
outer_keys = ["qrng-extraction-qrandom", "qrng-extraction-qrandom-xor", "qrng-extraction-qlib-xor", "qrng-extraction-qlib"]
modes_df, _ = combine_summary_dataframes(n_clients, n_requests, outer_keys, n_runs=100)
title_ks = 'ks_test_by_algorithm_fig_4.xlsx'
results_ks = ks_test(modes_df, outer_keys, inner_keys, title_ks, p_value_threshold)
title_wilcoxon = 'wilcoxon_test_by_algorithm_fig_4.xlsx'
results_wilcoxon = wilcoxon_test(modes_df, outer_keys, inner_keys, title_wilcoxon, p_value_threshold)