This file aims to construct a pairwise paired-t and welch table for comparison of systems of a simulation problem


In [9]:
#Just imports

from scipy import stats
import pandas as pd
import numpy as np
import math

In [2]:
#First iteration

In [3]:
# k = 14 : c = 91 : overall confidence = 90% : individual confidence = 0.0015151515

In [10]:
def confidence_intervals(x,y, alpha = 0.0010989010989011):
    """In this function we compute both the Welch and Paired T confidence intervals of significant differences of means
    of the supplied x and Y Series. Do note that we see x as the baseline and y as the new system, as the calculation
    below goes with x.mean - y.mean.
    The function returns an array of length two with as first entry the confidence interval according to welch and as 
    the second entry the confidence interval according to the paired-t test"""

    #WELCH
    ## Welch-Satterthwaite Degrees of Freedom ##
    dof = (x.var()/x.size + y.var()/y.size)**2 / ((x.var()/x.size)**2 / (x.size-1) + (y.var()/y.size)**2 / (y.size-1))
    lower_welch =x.mean()-y.mean()-stats.t.ppf(1-alpha/2, dof)*math.sqrt((x.var()/x.size) +(y.var()/y.size))
    upper_welch =x.mean()-y.mean()+stats.t.ppf(1-alpha/2, dof)*math.sqrt((x.var()/x.size) +(y.var()/y.size))

    results_welch = [lower_welch, upper_welch]

    #PAIRED - T
    paired_t_data = x - y #simulation - baseline
    half_width = (stats.t.ppf(1-alpha/2, paired_t_data.size)*paired_t_data.var())/np.sqrt(paired_t_data.size)
    barbar = x.mean() - y.mean()
    lower_paired_t = barbar - half_width
    upper_paired_t = barbar + half_width

    results_paired_t = [lower_paired_t, upper_paired_t]

    #return the two results arrays
    return [results_welch, results_paired_t]




In [12]:
#We read in all the systems to investigate in order to quickly access the folders relating to them in the next step
systems = ["(3,2)Comparative new", "(3,2)Comparative upgrade", "(3,3)Comparative new", "(3,3)Comparative upgrade",

               "(4,2)Comparative new","(4,2)Comparative upgrade","(4,3)Comparative new","(4,3)Comparative upgrade",
               "(5,2)Comparative new","(5,2)Comparative upgrade","(5,3)Comparative new","(5,3)Comparative upgrade",
                "(2,2) new","(2,2)upgrade"]


In [14]:
#We first construct two symetrical empty dataframes with the systems as columns and rows
pairwise_welch = pd.DataFrame(columns = systems, index= systems)
pairwise_paired_t = pd.DataFrame(columns = systems, index = systems)

#We loop through all the systems to select a system to compare with the others
for x_name in systems:
    #make a copy of the systems in order to be able to remove the current system(x_name) from the system list 
    comp_systems = systems.copy()
    comp_systems.remove(x_name)
    #read in our current system (x) and select the relevant performance measure making it a series object
    x = pd.read_csv("output/"+x_name+"/performance.csv")
    x = x['objective function']
    
    #loop through the systems - x list to compare x with all the other systems(comp_systems)
    for y_name in comp_systems:
        #read in the y data and select the relevant performance measure
        y = pd.read_csv("output/"+y_name+"/performance.csv")
        y = y["objective function"]
        #apply the confidence intervals function, explained above
        results = confidence_intervals(x,y)

        #at row = baseline system (x) and column = new system (y) we insert the intervals according to both methods
        pairwise_welch.at[x_name, y_name] = [round(result, 5) for result in results[0]]
        pairwise_paired_t.at[x_name, y_name] = [round(result,5) for result in results[1]]

#to aid in readability, we delete the lower half of our symmetrical dataframe and replace it with NaN as these values
# are redundant
pairwise_welch.values[np.tril_indices_from(pairwise_welch, 0)] = np.nan
pairwise_paired_t.values[np.tril_indices_from(pairwise_paired_t, 0)] = np.nan

#export to csv files in the comparative folder
pairwise_welch.to_csv("comparative/pairwise welch round.csv")
pairwise_paired_t.to_csv("comparative/pairwise paired-t round.csv")



In [7]:
#(3,2) is better than (5,3) : looking at the output of these runs

#this means that in order for the system at the row, to be better than the system at the column, the values should be negative

#example
#all values at the second row are negative and significant: which means that (3,2) upgrade is better than all other systems!
#and (3,2) new is worse than (3,2) upgrade => overall: (3,2) upgrade is performing better than all other systems + (3,2) new!

In [8]:
pairwise_paired_t

Unnamed: 0,"(3,2)Comparative new","(3,2)Comparative upgrade","(3,3)Comparative new","(3,3)Comparative upgrade","(4,2)Comparative new","(4,2)Comparative upgrade","(4,3)Comparative new","(4,3)Comparative upgrade","(5,2)Comparative new","(5,2)Comparative upgrade","(5,3)Comparative new","(5,3)Comparative upgrade"
"(3,2)Comparative new",,"[0.16097963854902722, 0.17390185431084412]","[-0.35958319105396347, -0.35336787860640767]","[-0.2137287442889266, -0.20435802181051302]","[-0.19870841090454455, -0.1925674181438723]","[-0.03165735100447219, -0.026787100451625818]","[-0.5327500064801775, -0.5254212839725664]","[-0.413420220206594, -0.4071344706025874]","[-0.3257425785451546, -0.3160488184067483]","[-0.16213064356546653, -0.1537518782024097]","[-0.65368060735157, -0.647118614553484]","[-0.5281898070704906, -0.5196662867602094]"
"(3,2)Comparative upgrade",,,"[-0.529221766603788, -0.5186107959164545]","[-0.3821581850465451, -0.3708100739127659]","[-0.36773121948458537, -0.3584261024237028]","[-0.20241611303840526, -0.19090983127756408]","[-0.7021527405660691, -0.690900042746546]","[-0.5811828415106044, -0.5742533421584484]","[-0.4932867927348503, -0.4833860970769239]","[-0.3325146048514914, -0.31824940977625615]","[-0.8232603830050648, -0.8124203317598605]","[-0.6950432812635029, -0.6876943054270684]"
"(3,3)Comparative new",,,,"[0.144133404126944, 0.15073089943398751]","[0.15757514887895271, 0.16410009173300158]","[0.3241036077439127, 0.3304030104603604]","[-0.1753263586270419, -0.16989386216533078]","[-0.05632207767243807, -0.05128154347637221]","[0.032914202993322345, 0.03824546971514589]","[0.19471835610120802, 0.20235019179128688]","[-0.29642074284251824, -0.29142740940216455]","[-0.1701011611444627, -0.16480386302586617]"
"(3,3)Comparative upgrade",,,,,"[0.010881414749742899, 0.015929522301279878]","[0.17528294195438915, 0.18435937268895247]","[-0.32583383555005785, -0.31425068880324636]","[-0.20414093406917555, -0.19832699064056625]","[-0.11613581784373668, -0.1075688130087266]","[0.04510050526575253, 0.05710373906581086]","[-0.44592797484991814, -0.43678448095569616]","[-0.31945456941551376, -0.3103147583157466]"
"(4,2)Comparative new",,,,,,"[0.1619116346665649, 0.17091974292575393]","[-0.33807133605436784, -0.32882412534995914]","[-0.21784341391859238, -0.2114354478421722]","[-0.13014604969787874, -0.12036951820560732]","[0.03283496514090776, 0.04255834213963285]","[-0.45790608138825617, -0.4516173114683809]","[-0.33181377602488654, -0.3247664887573966]"
"(4,2)Comparative upgrade",,,,,,,"[-0.5019955451887427, -0.49773129380790315]","[-0.38370544818017716, -0.37840479117290626]","[-0.29518715420055974, -0.28815979129524516]","[-0.13316366098223087, -0.12427440932954735]","[-0.6253553562991081, -0.6169994141498478]","[-0.4988665855663478, -0.4905450568082542]"
"(4,3)Comparative new",,,,,,,,"[0.11598690251952792, 0.12162969712403449]","[0.2044723087846284, 0.21190758471621252]","[0.3674471484387434, 0.3748416202461242]","[-0.12423859461877376, -0.11838933683353633]","[0.0015340310426679138, 0.008781165579375918]"
"(4,3)Comparative upgrade",,,,,,,,,"[0.08616578058273908, 0.09259751327453944]","[0.2470652843048792, 0.25760688473642596]","[-0.24322928918423545, -0.23701524191163706]","[-0.11722230924057986, -0.11007909378093872]"
"(5,2)Comparative new",,,,,,,,,,"[0.15761672463949314, 0.16829215054453353]","[-0.3347086353857417, -0.32429918956740933]","[-0.20749982644656856, -0.19856487043222854]"
"(5,2)Comparative upgrade",,,,,,,,,,,"[-0.49675413548446956, -0.48816256465270813]","[-0.370465185789244, -0.3615083862735798]"


In [9]:
#overall conclusion pairwise_paired_t

#(5,3) is perfroming worse than all other systems and should be excluded out of our analysis


In [10]:
#Second iteration

In [11]:
# k = 10 : c = 45 : overall confidence = 90% : individual confidence = 0.002222222222 

In [12]:
#We read in all the systems to investigate in order to quickly access the folders relating to them in the next step
systems = ["(3,2)Comparative new", "(3,2)Comparative upgrade", "(3,3)Comparative new", "(3,3)Comparative upgrade",
               "(4,2)Comparative new","(4,2)Comparative upgrade","(4,3)Comparative new","(4,3)Comparative upgrade",
               "(5,2)Comparative new","(5,2)Comparative upgrade"]


In [13]:
#We first construct two symetrical empty dataframes with the systems as columns and rows
pairwise_welch = pd.DataFrame(columns = systems, index= systems)
pairwise_paired_t = pd.DataFrame(columns = systems, index = systems)

#We loop through all the systems to select a system to compare with the others
for x_name in systems:
    #make a copy of the systems in order to be able to remove the current system(x_name) from the system list 
    comp_systems = systems.copy()
    comp_systems.remove(x_name)
    #read in our current system (x) and select the relevant performance measure making it a series object
    x = pd.read_csv("output/"+x_name+"/performance.csv")
    x = x['objective function']
    
    #loop through the systems - x list to compare x with all the other systems(comp_systems)
    for y_name in comp_systems:
        #read in the y data and select the relevant performance measure
        y = pd.read_csv("output/"+y_name+"/performance.csv")
        y = y["objective function"]
        #apply the confidence intervals function, explained above
        results = confidence_intervals(x,y, alpha = 0.0022222222222)

        #at row = baseline system (x) and column = new system (y) we insert the intervals according to both methods
        pairwise_welch.at[x_name, y_name] = results[0]
        pairwise_paired_t.at[x_name, y_name] = results[1]

#to aid in readability, we delete the lower half of our symmetrical dataframe and replace it with NaN as these values
# are redundant
pairwise_welch.values[np.tril_indices_from(pairwise_welch, 0)] = np.nan
pairwise_paired_t.values[np.tril_indices_from(pairwise_paired_t, 0)] = np.nan

#export to csv files in the comparative folder
pairwise_welch.to_csv("comparative/stage 2 pairwise welch.csv")
pairwise_paired_t.to_csv("comparative/stage 2 pairwise paired-t.csv")


In [14]:
pairwise_paired_t

Unnamed: 0,"(3,2)Comparative new","(3,2)Comparative upgrade","(3,3)Comparative new","(3,3)Comparative upgrade","(4,2)Comparative new","(4,2)Comparative upgrade","(4,3)Comparative new","(4,3)Comparative upgrade","(5,2)Comparative new","(5,2)Comparative upgrade"
"(3,2)Comparative new",,"[0.16124610578719606, 0.17363538707267528]","[-0.35945502594878465, -0.3534960437115865]","[-0.21353551190645487, -0.20455125419298475]","[-0.19858177833543159, -0.19269405071298526]","[-0.0315569222360684, -0.026887529220029602]","[-0.532598881898921, -0.5255724085538228]","[-0.4132906026264767, -0.4072640881827047]","[-0.3255426848474201, -0.3162487121044828]","[-0.16195786619456426, -0.15392465557331197]"
"(3,2)Comparative upgrade",,,"[-0.5290029592331744, -0.5188296032870681]","[-0.3819241772039283, -0.37104408175538267]","[-0.3675393399424268, -0.35861798196586137]","[-0.20217884358099042, -0.19114710073497893]","[-0.701920700228314, -0.6911320830843012]","[-0.5810399492552593, -0.5743962344137935]","[-0.4930826318481614, -0.4835902579636128]","[-0.33222044422117064, -0.31854357040657694]"
"(3,3)Comparative new",,,,"[0.14426945017242906, 0.15059485338850245]","[0.15770969883025945, 0.16396554178169484]","[0.32423350686298835, 0.3302731113412848]","[-0.1752143358628971, -0.17000588492947558]","[-0.05621813750839205, -0.05138548364041823]","[0.03302413831289064, 0.0381355343955776]","[0.19487573113780995, 0.20219281675468495]"
"(3,3)Comparative upgrade",,,,,"[0.010985511082912947, 0.01582542596810983]","[0.1754701057871907, 0.18417220885615093]","[-0.3255949810703139, -0.3144895432829903]","[-0.20402104553946465, -0.19844687917027715]","[-0.11595915881251158, -0.1077454720399517]","[0.04534802230298135, 0.05685622202858204]"
"(4,2)Comparative new",,,,,,"[0.16209738963097625, 0.1707339879613426]","[-0.33788065059124084, -0.32901481081308614]","[-0.21771127612936675, -0.21156758563139783]","[-0.12994444918341325, -0.1205711187200728]","[0.03303546956385514, 0.04235783771668547]"
"(4,2)Comparative upgrade",,,,,,,"[-0.5019076126461158, -0.49781922635053005]","[-0.3835961440593327, -0.37851409529375074]","[-0.29504224391431283, -0.2883047015814921]","[-0.13298035694359397, -0.12445771336818424]"
"(4,3)Comparative new",,,,,,,,"[0.11610326181282031, 0.1215133378307421]","[0.20462563058942035, 0.21175426291142058]","[0.3675996288272888, 0.3746891398575788]"
"(4,3)Comparative upgrade",,,,,,,,,"[0.08629840846007947, 0.09246488539719905]","[0.24728266120096648, 0.2573895078403387]"
"(5,2)Comparative new",,,,,,,,,,"[0.15783686113230716, 0.1680720140517195]"
"(5,2)Comparative upgrade",,,,,,,,,,


In [15]:
#overall conclusion pairwise_paired_t

#(4,3) is performing worse that all other systems and should be excluded from the analysis!

In [16]:
#third iteration

In [17]:
# k = 8 : c = 28 : overall confidence = 90% : individual confidence = 0.0035714286 

In [18]:
#We read in all the systems to investigate in order to quickly access the folders relating to them in the next step
systems = ["(3,2)Comparative new", "(3,2)Comparative upgrade", "(3,3)Comparative new", "(3,3)Comparative upgrade",
               "(4,2)Comparative new","(4,2)Comparative upgrade","(5,2)Comparative new","(5,2)Comparative upgrade"]


In [19]:
#We first construct two symetrical empty dataframes with the systems as columns and rows
pairwise_welch = pd.DataFrame(columns = systems, index= systems)
pairwise_paired_t = pd.DataFrame(columns = systems, index = systems)

#We loop through all the systems to select a system to compare with the others
for x_name in systems:
    #make a copy of the systems in order to be able to remove the current system(x_name) from the system list 
    comp_systems = systems.copy()
    comp_systems.remove(x_name)
    #read in our current system (x) and select the relevant performance measure making it a series object
    x = pd.read_csv("output/"+x_name+"/performance.csv")
    x = x['objective function']
    
    #loop through the systems - x list to compare x with all the other systems(comp_systems)
    for y_name in comp_systems:
        #read in the y data and select the relevant performance measure
        y = pd.read_csv("output/"+y_name+"/performance.csv")
        y = y["objective function"]
        #apply the confidence intervals function, explained above
        results = confidence_intervals(x,y, alpha = 0.0035714286)

        #at row = baseline system (x) and column = new system (y) we insert the intervals according to both methods
        pairwise_welch.at[x_name, y_name] = results[0]
        pairwise_paired_t.at[x_name, y_name] = results[1]

#to aid in readability, we delete the lower half of our symmetrical dataframe and replace it with NaN as these values
# are redundant
pairwise_welch.values[np.tril_indices_from(pairwise_welch, 0)] = np.nan
pairwise_paired_t.values[np.tril_indices_from(pairwise_paired_t, 0)] = np.nan

#export to csv files in the comparative folder
pairwise_welch.to_csv("comparative/stage 3 pairwise welch.csv")
pairwise_paired_t.to_csv("comparative/stage 3 pairwise paired-t.csv")


In [20]:
pairwise_paired_t

Unnamed: 0,"(3,2)Comparative new","(3,2)Comparative upgrade","(3,3)Comparative new","(3,3)Comparative upgrade","(4,2)Comparative new","(4,2)Comparative upgrade","(5,2)Comparative new","(5,2)Comparative upgrade"
"(3,2)Comparative new",,"[0.16158211140485254, 0.1732993814550188]","[-0.35929341435396517, -0.35365765530640597]","[-0.21329185281173904, -0.20479491328770058]","[-0.19842209921355292, -0.19285372983486393]","[-0.03143028516582335, -0.02701416629027465]","[-0.32529062607338666, -0.31650077087851625]","[-0.16174000013483392, -0.15414252163304232]"
"(3,2)Comparative upgrade",,,"[-0.5287270509968898, -0.5191055115233527]","[-0.3816291017183443, -0.37133915724096667]","[-0.36729738673076123, -0.35885993517752696]","[-0.20187965531628083, -0.19144628899968852]","[-0.49282519230210464, -0.4838476975096696]","[-0.3318495182305671, -0.3189144963971805]"
"(3,3)Comparative new",,,,"[0.14444099934994545, 0.15042330421098607]","[0.15787936148674644, 0.16379587912520785]","[0.3243973049870578, 0.3301093132172153]","[0.03316276280274406, 0.03799690990572418]","[0.1950741754071337, 0.2019943724853612]"
"(3,3)Comparative upgrade",,,,,"[0.011116772820614499, 0.01569416423040828]","[0.17570611265871303, 0.1839362019846286]","[-0.11573639811851673, -0.10796823273394655]","[0.04566013239781361, 0.05654411193374978]"
"(4,2)Comparative new",,,,,,"[0.1623316199700555, 0.17049975762226335]","[-0.12969023817477918, -0.12082532972870687]","[0.03328829844044898, 0.042105008840091634]"
"(4,2)Comparative upgrade",,,,,,,"[-0.29485951724753845, -0.28848742824826645]","[-0.13274921713389592, -0.12468885317788231]"
"(5,2)Comparative new",,,,,,,,"[0.15811444534395358, 0.1677944298400731]"
"(5,2)Comparative upgrade",,,,,,,,


In [21]:
#overall conclusion pairwise_paired_t
"""we can safely delete (3,3) new and (5,2) new as they are only significantly better between themselves but not compared to the other systems"""

In [None]:
# k = 6 : c = 15 : overall confidence = 90% : individual confidence = 0,0066666666666667

In [22]:
#We read in all the systems to investigate in order to quickly access the folders relating to them in the next step
systems = ["(3,2)Comparative new", "(3,2)Comparative upgrade",  "(3,3)Comparative upgrade",
               "(4,2)Comparative new","(4,2)Comparative upgrade","(5,2)Comparative upgrade"]


In [23]:
#We first construct two symetrical empty dataframes with the systems as columns and rows
pairwise_welch = pd.DataFrame(columns = systems, index= systems)
pairwise_paired_t = pd.DataFrame(columns = systems, index = systems)

#We loop through all the systems to select a system to compare with the others
for x_name in systems:
    #make a copy of the systems in order to be able to remove the current system(x_name) from the system list
    comp_systems = systems.copy()
    comp_systems.remove(x_name)
    #read in our current system (x) and select the relevant performance measure making it a series object
    x = pd.read_csv("output/"+x_name+"/performance.csv")
    x = x['objective function']

    #loop through the systems - x list to compare x with all the other systems(comp_systems)
    for y_name in comp_systems:
        #read in the y data and select the relevant performance measure
        y = pd.read_csv("output/"+y_name+"/performance.csv")
        y = y["objective function"]
        #apply the confidence intervals function, explained above
        results = confidence_intervals(x,y, alpha = 0.0066666666666667)

        #at row = baseline system (x) and column = new system (y) we insert the intervals according to both methods
        pairwise_welch.at[x_name, y_name] = results[0]
        pairwise_paired_t.at[x_name, y_name] = results[1]

#to aid in readability, we delete the lower half of our symmetrical dataframe and replace it with NaN as these values
# are redundant
pairwise_welch.values[np.tril_indices_from(pairwise_welch, 0)] = np.nan
pairwise_paired_t.values[np.tril_indices_from(pairwise_paired_t, 0)] = np.nan

#export to csv files in the comparative folder
pairwise_welch.to_csv("comparative/stage 4 pairwise welch.csv")
pairwise_paired_t.to_csv("comparative/stage 4 pairwise paired-t.csv")


In [24]:
pairwise_paired_t

Unnamed: 0,"(3,2)Comparative new","(3,2)Comparative upgrade","(3,3)Comparative upgrade","(4,2)Comparative new","(4,2)Comparative upgrade","(5,2)Comparative upgrade"
"(3,2)Comparative new",,"[0.16203613650858456, 0.17284535635128678]","[-0.21296261026357813, -0.2051241558358615]","[-0.19820633398206525, -0.1930694950663516]","[-0.031259167762027106, -0.0271852836940709]","[-0.16144561023427606, -0.15443691153360017]"
"(3,2)Comparative upgrade",,,"[-0.38123038315260876, -0.3717378758067022]","[-0.3669704492444449, -0.3591868726638433]","[-0.20147537938819485, -0.1918505649277745]","[-0.33134830723137226, -0.3194157073963753]"
"(3,3)Comparative upgrade",,,,"[0.01129413927207938, 0.015516797778943396]","[0.17602501519692582, 0.1836172994464158]","[0.04608186885320499, 0.0561223754783584]"
"(4,2)Comparative new",,,,,"[0.16264812198195444, 0.1701832556103644]","[0.0336299315884975, 0.04176337569204311]"
"(4,2)Comparative upgrade",,,,,,"[-0.132436891176947, -0.12500117913483122]"
"(5,2)Comparative upgrade",,,,,,


In [7]:
pairwise_paired_t

In [8]:
#We first construct two symetrical empty dataframes with the systems as columns and rows
pairwise_welch = pd.DataFrame(columns = systems, index= systems)
pairwise_paired_t = pd.DataFrame(columns = systems, index = systems)

#We loop through all the systems to select a system to compare with the others
for x_name in systems:
    #make a copy of the systems in order to be able to remove the current system(x_name) from the system list 
    comp_systems = systems.copy()
    comp_systems.remove(x_name)
    #read in our current system (x) and select the relevant performance measure making it a series object
    x = pd.read_csv("output/"+x_name+"/performance.csv")
    x = x['objective function']
    
    #loop through the systems - x list to compare x with all the other systems(comp_systems)
    for y_name in comp_systems:
        #read in the y data and select the relevant performance measure
        y = pd.read_csv("output/"+y_name+"/performance.csv")
        y = y["objective function"]
        #apply the confidence intervals function, explained above
        results = confidence_intervals(x,y)

        #at row = baseline system (x) and column = new system (y) we insert the intervals according to both methods
        pairwise_welch.at[x_name, y_name] = results[0]
        pairwise_paired_t.at[x_name, y_name] = results[1]

#to aid in readability, we delete the lower half of our symmetrical dataframe and replace it with NaN as these values
# are redundant
pairwise_welch.values[np.tril_indices_from(pairwise_welch, 0)] = np.nan
pairwise_paired_t.values[np.tril_indices_from(pairwise_paired_t, 0)] = np.nan

#export to csv files in the comparative folder
pairwise_welch.to_csv("comparative/pairwise welch.csv")
pairwise_paired_t.to_csv("comparative/pairwise paired-t.csv")



In [None]:
#(3,2) is better than (5,3) : looking at the output of these runs

#this means that in order for the system at the row, to be better than the system at the column, the values should be negative

#example
#all values at the second row are negative and significant: which means that (3,2) upgrade is better than all other systems!
#and (3,2) new is worse than (3,2) upgrade => overall: (3,2) upgrade is performing better than all other systems + (3,2) new!

In [10]:
pairwise_paired_t

Unnamed: 0,"(3,2)Comparative new","(3,2)Comparative upgrade","(3,3)Comparative new","(3,3)Comparative upgrade","(4,2)Comparative new","(4,2)Comparative upgrade","(4,3)Comparative new","(4,3)Comparative upgrade","(5,2)Comparative new","(5,2)Comparative upgrade","(5,3)Comparative new","(5,3)Comparative upgrade"
"(3,2)Comparative new",,"[0.16097963854902633, 0.17390185431084323]","[-0.3595831910539639, -0.3533678786064081]","[-0.21372874428892705, -0.20435802181051346]","[-0.19870841090454588, -0.19256741814387363]","[-0.03165735100447352, -0.02678710045162715]","[-0.5327500064801777, -0.5254212839725666]","[-0.4134202202065947, -0.40713447060258806]","[-0.3257425785451548, -0.31604881840674853]","[-0.16213064356546697, -0.15375187820241015]","[-0.6536806073515709, -0.6471186145534848]","[-0.528189807070491, -0.5196662867602099]"
"(3,2)Comparative upgrade",,,"[-0.5292217666037875, -0.5186107959164541]","[-0.38215818504654464, -0.37081007391276544]","[-0.3677312194845858, -0.35842610242370326]","[-0.2024161130384057, -0.19090983127756453]","[-0.7021527405660685, -0.6909000427465454]","[-0.5811828415106042, -0.5742533421584481]","[-0.49328679273484966, -0.48338609707692326]","[-0.332514604851491, -0.3182494097762557]","[-0.8232603830050648, -0.8124203317598605]","[-0.6950432812635025, -0.687694305427068]"
"(3,3)Comparative new",,,,"[0.144133404126944, 0.15073089943398751]","[0.15757514887895183, 0.1641000917330007]","[0.32410360774391184, 0.3304030104603595]","[-0.17532635862704168, -0.16989386216533056]","[-0.05632207767243829, -0.05128154347637243]","[0.03291420299332257, 0.03824546971514611]","[0.19471835610120802, 0.20235019179128688]","[-0.2964207428425187, -0.291427409402165]","[-0.1701011611444627, -0.16480386302586617]"
"(3,3)Comparative upgrade",,,,,"[0.01088141474974201, 0.01592952230127899]","[0.17528294195438826, 0.18435937268895158]","[-0.3258338355500576, -0.31425068880324614]","[-0.20414093406917577, -0.19832699064056647]","[-0.11613581784373646, -0.10756881300872638]","[0.04510050526575253, 0.05710373906581086]","[-0.4459279748499186, -0.4367844809556966]","[-0.31945456941551376, -0.3103147583157466]"
"(4,2)Comparative new",,,,,,"[0.1619116346665649, 0.17091974292575393]","[-0.33807133605436673, -0.32882412534995803]","[-0.21784341391859172, -0.21143544784217153]","[-0.13014604969787763, -0.12036951820560621]","[0.03283496514090865, 0.04255834213963374]","[-0.4579060813882557, -0.45161731146838047]","[-0.33181377602488565, -0.3247664887573957]"
"(4,2)Comparative upgrade",,,,,,,"[-0.5019955451887416, -0.49773129380790204]","[-0.3837054481801765, -0.3784047911729056]","[-0.29518715420055863, -0.28815979129524405]","[-0.13316366098222998, -0.12427440932954646]","[-0.6253553562991077, -0.6169994141498474]","[-0.4988665855663469, -0.49054505680825333]"
"(4,3)Comparative new",,,,,,,,"[0.11598690251952748, 0.12162969712403404]","[0.2044723087846284, 0.21190758471621252]","[0.36744714843874315, 0.374841620246124]","[-0.12423859461877443, -0.118389336833537]","[0.0015340310426676918, 0.008781165579375695]"
"(4,3)Comparative upgrade",,,,,,,,,"[0.08616578058273952, 0.09259751327453988]","[0.24706528430487942, 0.2576068847364262]","[-0.24322928918423567, -0.23701524191163728]","[-0.11722230924057964, -0.1100790937809385]"
"(5,2)Comparative new",,,,,,,,,,"[0.15761672463949292, 0.1682921505445333]","[-0.33470863538574236, -0.32429918956741]","[-0.20749982644656878, -0.19856487043222876]"
"(5,2)Comparative upgrade",,,,,,,,,,,"[-0.49675413548447, -0.4881625646527086]","[-0.370465185789244, -0.3615083862735798]"


In [None]:
#overall conclusion pairwise_paired_t

#(5,3) is perfroming worse than all other systems and should be excluded out of our analysis


In [None]:
#Second iteration

In [None]:
# k = 10 : c = 45 : overall confidence = 90% : individual confidence = 0.002222222222 

In [11]:
#We read in all the systems to investigate in order to quickly access the folders relating to them in the next step
systems = ["(3,2)Comparative new", "(3,2)Comparative upgrade", "(3,3)Comparative new", "(3,3)Comparative upgrade",
               "(4,2)Comparative new","(4,2)Comparative upgrade","(4,3)Comparative new","(4,3)Comparative upgrade",
               "(5,2)Comparative new","(5,2)Comparative upgrade"]


In [12]:
#We first construct two symetrical empty dataframes with the systems as columns and rows
pairwise_welch = pd.DataFrame(columns = systems, index= systems)
pairwise_paired_t = pd.DataFrame(columns = systems, index = systems)

#We loop through all the systems to select a system to compare with the others
for x_name in systems:
    #make a copy of the systems in order to be able to remove the current system(x_name) from the system list 
    comp_systems = systems.copy()
    comp_systems.remove(x_name)
    #read in our current system (x) and select the relevant performance measure making it a series object
    x = pd.read_csv("output/"+x_name+"/performance.csv")
    x = x['objective function']
    
    #loop through the systems - x list to compare x with all the other systems(comp_systems)
    for y_name in comp_systems:
        #read in the y data and select the relevant performance measure
        y = pd.read_csv("output/"+y_name+"/performance.csv")
        y = y["objective function"]
        #apply the confidence intervals function, explained above
        results = confidence_intervals(x,y, alpha = 0.0022222222222)

        #at row = baseline system (x) and column = new system (y) we insert the intervals according to both methods
        pairwise_welch.at[x_name, y_name] = results[0]
        pairwise_paired_t.at[x_name, y_name] = results[1]

#to aid in readability, we delete the lower half of our symmetrical dataframe and replace it with NaN as these values
# are redundant
pairwise_welch.values[np.tril_indices_from(pairwise_welch, 0)] = np.nan
pairwise_paired_t.values[np.tril_indices_from(pairwise_paired_t, 0)] = np.nan

#export to csv files in the comparative folder
pairwise_welch.to_csv("comparative/pairwise welch.csv")
pairwise_paired_t.to_csv("comparative/pairwise paired-t.csv")


In [13]:
pairwise_paired_t

Unnamed: 0,"(3,2)Comparative new","(3,2)Comparative upgrade","(3,3)Comparative new","(3,3)Comparative upgrade","(4,2)Comparative new","(4,2)Comparative upgrade","(4,3)Comparative new","(4,3)Comparative upgrade","(5,2)Comparative new","(5,2)Comparative upgrade"
"(3,2)Comparative new",,"[0.16124610578719517, 0.1736353870726744]","[-0.3594550259487851, -0.35349604371158694]","[-0.2135355119064553, -0.2045512541929852]","[-0.19858177833543292, -0.1926940507129866]","[-0.031556922236069734, -0.026887529220030935]","[-0.5325988818989212, -0.525572408553823]","[-0.4132906026264774, -0.4072640881827054]","[-0.3255426848474203, -0.316248712104483]","[-0.1619578661945647, -0.15392465557331242]"
"(3,2)Comparative upgrade",,,"[-0.529002959233174, -0.5188296032870676]","[-0.38192417720392785, -0.3710440817553822]","[-0.36753933994242727, -0.3586179819658618]","[-0.20217884358099086, -0.19114710073497937]","[-0.7019207002283133, -0.6911320830843005]","[-0.581039949255259, -0.5743962344137933]","[-0.49308263184816076, -0.48359025796361216]","[-0.3322204442211702, -0.3185435704065765]"
"(3,3)Comparative new",,,,"[0.14426945017242906, 0.15059485338850245]","[0.15770969883025857, 0.16396554178169395]","[0.32423350686298746, 0.3302731113412839]","[-0.1752143358628969, -0.17000588492947535]","[-0.056218137508392274, -0.05138548364041845]","[0.03302413831289086, 0.03813553439557782]","[0.19487573113780995, 0.20219281675468495]"
"(3,3)Comparative upgrade",,,,,"[0.010985511082912059, 0.01582542596810894]","[0.1754701057871898, 0.18417220885615004]","[-0.3255949810703137, -0.31448954328299006]","[-0.20402104553946487, -0.19844687917027737]","[-0.11595915881251136, -0.10774547203995148]","[0.04534802230298135, 0.05685622202858204]"
"(4,2)Comparative new",,,,,,"[0.16209738963097625, 0.1707339879613426]","[-0.33788065059123973, -0.32901481081308503]","[-0.21771127612936608, -0.21156758563139716]","[-0.12994444918341214, -0.12057111872007169]","[0.03303546956385603, 0.04235783771668636]"
"(4,2)Comparative upgrade",,,,,,,"[-0.5019076126461147, -0.49781922635052894]","[-0.383596144059332, -0.3785140952937501]","[-0.2950422439143117, -0.28830470158149096]","[-0.1329803569435931, -0.12445771336818336]"
"(4,3)Comparative new",,,,,,,,"[0.11610326181281987, 0.12151333783074166]","[0.20462563058942035, 0.21175426291142058]","[0.3675996288272886, 0.37468913985757857]"
"(4,3)Comparative upgrade",,,,,,,,,"[0.08629840846007991, 0.09246488539719949]","[0.2472826612009667, 0.2573895078403389]"
"(5,2)Comparative new",,,,,,,,,,"[0.15783686113230694, 0.1680720140517193]"
"(5,2)Comparative upgrade",,,,,,,,,,


In [None]:
#overall conclusion pairwise_paired_t

#(4,3) is performing worse that all other systems and should be excluded from the analysis!

In [None]:
#third iteration

In [None]:
# k = 8 : c = 28 : overall confidence = 90% : individual confidence = 0.0035714286 

In [14]:
#We read in all the systems to investigate in order to quickly access the folders relating to them in the next step
systems = ["(3,2)Comparative new", "(3,2)Comparative upgrade", "(3,3)Comparative new", "(3,3)Comparative upgrade",
               "(4,2)Comparative new","(4,2)Comparative upgrade","(5,2)Comparative new","(5,2)Comparative upgrade"]


In [15]:
#We first construct two symetrical empty dataframes with the systems as columns and rows
pairwise_welch = pd.DataFrame(columns = systems, index= systems)
pairwise_paired_t = pd.DataFrame(columns = systems, index = systems)

#We loop through all the systems to select a system to compare with the others
for x_name in systems:
    #make a copy of the systems in order to be able to remove the current system(x_name) from the system list 
    comp_systems = systems.copy()
    comp_systems.remove(x_name)
    #read in our current system (x) and select the relevant performance measure making it a series object
    x = pd.read_csv("output/"+x_name+"/performance.csv")
    x = x['objective function']
    
    #loop through the systems - x list to compare x with all the other systems(comp_systems)
    for y_name in comp_systems:
        #read in the y data and select the relevant performance measure
        y = pd.read_csv("output/"+y_name+"/performance.csv")
        y = y["objective function"]
        #apply the confidence intervals function, explained above
        results = confidence_intervals(x,y, alpha = 0.0035714286)

        #at row = baseline system (x) and column = new system (y) we insert the intervals according to both methods
        pairwise_welch.at[x_name, y_name] = results[0]
        pairwise_paired_t.at[x_name, y_name] = results[1]

#to aid in readability, we delete the lower half of our symmetrical dataframe and replace it with NaN as these values
# are redundant
pairwise_welch.values[np.tril_indices_from(pairwise_welch, 0)] = np.nan
pairwise_paired_t.values[np.tril_indices_from(pairwise_paired_t, 0)] = np.nan

#export to csv files in the comparative folder
pairwise_welch.to_csv("comparative/pairwise welch.csv")
pairwise_paired_t.to_csv("comparative/pairwise paired-t.csv")


In [16]:
pairwise_paired_t

Unnamed: 0,"(3,2)Comparative new","(3,2)Comparative upgrade","(3,3)Comparative new","(3,3)Comparative upgrade","(4,2)Comparative new","(4,2)Comparative upgrade","(5,2)Comparative new","(5,2)Comparative upgrade"
"(3,2)Comparative new",,"[0.16158211140485165, 0.17329938145501791]","[-0.3592934143539656, -0.3536576553064064]","[-0.21329185281173949, -0.20479491328770102]","[-0.19842209921355425, -0.19285372983486526]","[-0.03143028516582468, -0.027014166290275983]","[-0.3252906260733869, -0.31650077087851647]","[-0.16174000013483436, -0.15414252163304276]"
"(3,2)Comparative upgrade",,,"[-0.5287270509968893, -0.5191055115233523]","[-0.38162910171834385, -0.3713391572409662]","[-0.3672973867307617, -0.3588599351775274]","[-0.20187965531628127, -0.19144628899968896]","[-0.492825192302104, -0.48384769750966894]","[-0.33184951823056663, -0.31891449639718006]"
"(3,3)Comparative new",,,,"[0.14444099934994542, 0.1504233042109861]","[0.15787936148674556, 0.16379587912520696]","[0.32439730498705693, 0.33010931321721443]","[0.03316276280274428, 0.0379969099057244]","[0.1950741754071337, 0.2019943724853612]"
"(3,3)Comparative upgrade",,,,,"[0.01111677282061361, 0.01569416423040739]","[0.17570611265871214, 0.1839362019846277]","[-0.1157363981185165, -0.10796823273394633]","[0.04566013239781361, 0.05654411193374978]"
"(4,2)Comparative new",,,,,,"[0.16233161997005546, 0.17049975762226338]","[-0.12969023817477807, -0.12082532972870576]","[0.033288298440449865, 0.04210500884009252]"
"(4,2)Comparative upgrade",,,,,,,"[-0.29485951724753734, -0.28848742824826534]","[-0.13274921713389504, -0.12468885317788142]"
"(5,2)Comparative new",,,,,,,,"[0.15811444534395336, 0.16779442984007287]"
"(5,2)Comparative upgrade",,,,,,,,


In [None]:
#overall conclusion pairwise_paired_t

#nothing?