In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [2]:
params_df = pd.read_excel("C:\\Users\\Nefeli\\Desktop\\opti2_stat_scripts\\ga_results\\ga_full.xlsx")

In [3]:
minimized_df = params_df.copy(deep=True)
to_drop = ['expID_list', 'costVal_list', 'lastHit_list','feasibility_list',
          'costVal_mean', 'lastHit_mean','feasibility_mean','feasibility_median',
          'costVal_std', 'lastHit_std','feasibility_std',
          'costVal_min', 'lastHit_min','feasibility_min',
          'costVal_max', 'lastHit_max','feasibility_max']
minimized_df.drop(to_drop, axis=1, inplace=True)
minimized_df

Unnamed: 0,select_op,N,costVal_median,lastHit_median
0,2,50,449.651135,6368
1,2,100,447.033719,16591
2,2,200,441.783015,24785
3,25,50,394.480284,158950
4,25,100,390.132807,173084
5,25,200,379.444482,158801


Helper Functions

In [4]:
#isolates data that corresponds to a file 
#and gets the median result for the column defined as input, for each parametrization
def getMediansOf(minimized_df,columnName):
    outputList=[]
    for i in range(1):
        subDf = minimized_df
        resPerFile=[]
        for j in range(6):
            resPerFile.append(subDf.iat[j, subDf.columns.get_loc(columnName)])
        outputList.append(resPerFile)
    return outputList

### Wilcoxon on Cost Value Medians 

In [5]:
costValList = getMediansOf(minimized_df,'costVal_median')

outdfC = pd.DataFrame(costValList,columns=['(s=2,50)','(s=2,100)','(s=2,200)','(N_tour=25,50)','(N_tour=25,100)','(N_tour=25,200)'],index=['0'])
outdfC

Unnamed: 0,"(s=2,50)","(s=2,100)","(s=2,200)","(N_tour=25,50)","(N_tour=25,100)","(N_tour=25,200)"
0,449.651135,447.033719,441.783015,394.480284,390.132807,379.444482


In [6]:
latex_table = outdfC.to_latex(
    index=False,        # Exclude row indices
    column_format='|c|c|c|c|c|c|c|',  # Specify column formatting
    escape=False        # Disable LaTeX escaping of special characters
)

# Print the LaTeX table or save it to a .tex file
print(latex_table)

\begin{tabular}{|c|c|c|c|c|c|c|}
\toprule
(s=2,50) & (s=2,100) & (s=2,200) & (N_tour=25,50) & (N_tour=25,100) & (N_tour=25,200) \\
\midrule
449.651135 & 447.033719 & 441.783015 & 394.480284 & 390.132807 & 379.444482 \\
\bottomrule
\end{tabular}



In [7]:
pVal_matrix_costVals = np.zeros((6,6))
H0_rej_matrix_costVals = np.zeros((6,6))
p = ['(s=2,50)','(s=2,100)','(s=2,200)','(N_tour=25,50)','(N_tour=25,100)','(N_tour=25,200)']
for i in range(6):
    current = outdfC[p[i]].values
    for j in range(6):
        if i==j:
            #necessary step because 
            pVal_matrix_costVals[i][j]= 10 # a pvalue cannot take a value of 10 
        else:
            statistic,pvalue = stats.wilcoxon(current,outdfC[p[j]].values, alternative='two-sided')
            pVal_matrix_costVals[i][j]=pvalue

print(p)
for i in range(6):
    print(str(p[i])+str(pVal_matrix_costVals[i][:]))
    
for i in range(6):
    for j in range(6):
        if pVal_matrix_costVals[i][j]<0.05:
            H0_rej_matrix_costVals[i][j]= 1
print(H0_rej_matrix_costVals)  

['(s=2,50)', '(s=2,100)', '(s=2,200)', '(N_tour=25,50)', '(N_tour=25,100)', '(N_tour=25,200)']
(s=2,50)[10.  1.  1.  1.  1.  1.]
(s=2,100)[ 1. 10.  1.  1.  1.  1.]
(s=2,200)[ 1.  1. 10.  1.  1.  1.]
(N_tour=25,50)[ 1.  1.  1. 10.  1.  1.]
(N_tour=25,100)[ 1.  1.  1.  1. 10.  1.]
(N_tour=25,200)[ 1.  1.  1.  1.  1. 10.]
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]


### Wilcoxon on Last Hit

In [8]:
lastHitList = getMediansOf(minimized_df,'lastHit_median')

outdfL = pd.DataFrame(lastHitList,columns=['(s=2,50)','(s=2,100)','(s=2,200)','(N_tour=25,50)','(N_tour=25,100)','(N_tour=25,200)'],index=['0'])
outdfL

Unnamed: 0,"(s=2,50)","(s=2,100)","(s=2,200)","(N_tour=25,50)","(N_tour=25,100)","(N_tour=25,200)"
0,6368,16591,24785,158950,173084,158801


In [9]:
latex_table = outdfL.to_latex(
    index=False,        # Exclude row indices
    column_format='|c|c|c|c|c|c|c|',  # Specify column formatting
    escape=False        # Disable LaTeX escaping of special characters
)

# Print the LaTeX table or save it to a .tex file
print(latex_table)

\begin{tabular}{|c|c|c|c|c|c|c|}
\toprule
(s=2,50) & (s=2,100) & (s=2,200) & (N_tour=25,50) & (N_tour=25,100) & (N_tour=25,200) \\
\midrule
6368 & 16591 & 24785 & 158950 & 173084 & 158801 \\
\bottomrule
\end{tabular}



In [10]:
pVal_matrix_lastHit = np.zeros((6,6))
H0_rej_matrix_lastHit = np.zeros((6,6))
p = ['(s=2,50)','(s=2,100)','(s=2,200)','(N_tour=25,50)','(N_tour=25,100)','(N_tour=25,200)']
for i in range(6):
    current = outdfL[p[i]].values
    for j in range(6):
        if i==j:
            #necessary step because 
            pVal_matrix_lastHit[i][j]= 10 # a pvalue cannot take a value of 10 
        else:
            statistic,pvalue = stats.wilcoxon(current,outdfL[p[j]].values, alternative='two-sided')
            pVal_matrix_lastHit[i][j]=pvalue

print(p)
for i in range(6):
    print(str(p[i])+str(pVal_matrix_lastHit[i][:]))
    
for i in range(6):
    for j in range(6):
        if pVal_matrix_lastHit[i][j]<0.05:
            H0_rej_matrix_lastHit[i][j]= 1
print(H0_rej_matrix_lastHit)

['(s=2,50)', '(s=2,100)', '(s=2,200)', '(N_tour=25,50)', '(N_tour=25,100)', '(N_tour=25,200)']
(s=2,50)[10.  1.  1.  1.  1.  1.]
(s=2,100)[ 1. 10.  1.  1.  1.  1.]
(s=2,200)[ 1.  1. 10.  1.  1.  1.]
(N_tour=25,50)[ 1.  1.  1. 10.  1.  1.]
(N_tour=25,100)[ 1.  1.  1.  1. 10.  1.]
(N_tour=25,200)[ 1.  1.  1.  1.  1. 10.]
[[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]
