This file aims to construct a pairwise paired-t and welch table for comparison of systems of a simulation problem


In [None]:
#Just imports

from scipy import stats
import pandas as pd
import numpy as np
import math

In [None]:
def confidence_intervals(x,y, alpha = 0.0076923):
    """In this function we compute both the Welch and Paired T confidence intervals of significant differences of means
    of the supplied x and Y Series. Do note that we see x as the baseline and y as the new system, as the calculation
    below goes with x.mean - y.mean.
    The function returns an array of length two with as first entry the confidence interval according to welch and as 
    the second entry the confidence interval according to the paired-t test"""

    #WELCH
    ## Welch-Satterthwaite Degrees of Freedom ##
    dof = (x.var()/x.size + y.var()/y.size)**2 / ((x.var()/x.size)**2 / (x.size-1) + (y.var()/y.size)**2 / (y.size-1))
    lower_welch =x.mean()-y.mean()-stats.t.ppf(1-alpha/2, dof)*math.sqrt((x.var()/x.size) +(y.var()/y.size))
    upper_welch =x.mean()-y.mean()+stats.t.ppf(1-alpha/2, dof)*math.sqrt((x.var()/x.size) +(y.var()/y.size))

    results_welch = [lower_welch, upper_welch]

    #PAIRED - T
    paired_t_data = x - y #simulation - baseline
    half_width = (stats.t.ppf(1-alpha/2, paired_t_data.size)*paired_t_data.var())/np.sqrt(paired_t_data.size)
    barbar = x.mean() - y.mean()
    lower_paired_t = barbar - half_width
    upper_paired_t = barbar + half_width

    results_paired_t = [lower_paired_t, upper_paired_t]

    #return the two results arrays
    return [results_welch, results_paired_t]




In [None]:
#We read in all the systems to investigate in order to quickly access the folders relating to them in the next step
systems = ["(3,2)Comparative new", "(3,2)Comparative upgrade", "(3,3)Comparative new", "(3,3)Comparative upgrade",
               "(4,2)Comparative new","(4,2)Comparative upgrade","(4,3)Comparative new","(4,3)Comparative upgrade",
               "(5,2)Comparative new","(5,2)Comparative upgrade","(5,3)Comparative new","(5,3)Comparative upgrade"]


In [None]:
#We first construct two symetrical empty dataframes with the systems as columns and rows
pairwise_welch = pd.DataFrame(columns = systems, index= systems)
pairwise_paired_t = pd.DataFrame(columns = systems, index = systems)

#We loop through all the systems to select a system to compare with the others
for x_name in systems:
    #make a copy of the systems in order to be able to remove the current system(x_name) from the system list 
    comp_systems = systems.copy()
    comp_systems.remove(x_name)
    #read in our current system (x) and select the relevant performance measure making it a series object
    x = pd.read_csv("output/"+x_name+"/performance.csv")
    x = x['objective function']
    
    #loop through the systems - x list to compare x with all the other systems(comp_systems)
    for y_name in comp_systems:
        #read in the y data and select the relevant performance measure
        y = pd.read_csv("output/"+y_name+"/performance.csv")
        y = y["objective function"]
        #apply the confidence intervals function, explained above
        results = confidence_intervals(x,y)

        #at row = baseline system (x) and column = new system (y) we insert the intervals according to both methods
        pairwise_welch.at[x_name, y_name] = results[0]
        pairwise_paired_t.at[x_name, y_name] = results[1]

#to aid in readability, we delete the lower half of our symmetrical dataframe and replace it with NaN as these values
# are redundant
pairwise_welch.values[np.tril_indices_from(pairwise_welch, 0)] = np.nan
pairwise_paired_t.values[np.tril_indices_from(pairwise_paired_t, 0)] = np.nan

#export to csv files in the comparative folder
pairwise_welch.to_csv("comparative/pairwise welch.csv")
pairwise_paired_t.to_csv("comparative/pairwise paired-t.csv")

