In [20]:
import pandas as pd
from scipy import stats

In [9]:
data_folder = '../Data/Produced/'
# Import results obtained with Verma and Lewis penalty coefficients
verma_greedy = pd.read_pickle(data_folder + 'verma_greedy_broken_constraints.pkl')
verma_sa = pd.read_pickle(data_folder + 'verma_sa_broken_constraints.pkl')
verma_tabu = pd.read_pickle(data_folder + 'verma_tabu_broken_constraints.pkl')
# Import results obtained with monotone penalty coefficients (always =0)
monotone_greedy = pd.read_pickle(data_folder + 'monotone_greedy_broken_constraints.pkl')
monotone_sa = pd.read_pickle(data_folder + 'monotone_sa_broken_constraints.pkl')
monotone_tabu = pd.read_pickle(data_folder + 'monotone_tabu_broken_constraints.pkl')

## Comparing Verma and Lewis to Monotone

### Look at overall significance

In [117]:
# Flatten the dataframes
flat = lambda df: df.to_numpy().flatten()
a1, a2, a3 = flat(verma_greedy), flat(verma_sa), flat(verma_tabu)
b1, b2, b3 = flat(monotone_greedy), flat(monotone_sa), flat(monotone_tabu)
# Calculate statistics
significance = pd.DataFrame(index=['t-statistic','p-value'])
significance['Greedy Algorithm'] = stats.ttest_ind(a1, b1)
significance['Simulated Annealing'] = stats.ttest_ind(a2, b2)
significance['Tabu Search'] = stats.ttest_ind(a3, b3)

Negative t-statistic means that the mean of Verma and Lewis result is smaller than the mean of Monotone. Therefore, the number of broken constraints in the first is significantly smaller than in second.

In [40]:
significance

Unnamed: 0,Greedy Algorithm,Simulated Annealing,Tabu Search
t-statistic,-17.89983,-19.04377,-18.62161
p-value,4.94995e-60,2.233729e-66,5.102605e-64


### Look at significant difference across different problems

To get a better idea

In [114]:
column_names = ['Greedy Algorithm t-statistic', 'Greedy Algorithm p-value', 
                'Simulated Annealing t-statistic', 'Simulated Annealing p-value',
                'Tabu Search t-statistic', 'Tabu Search p-value']
detailed_significance = pd.DataFrame(columns=column_names)
detailed_significance.index.name = 'Problem'
for i in range(len(verma_greedy)):
    # Flatten the rows
    flat = lambda column: column.iloc[i].values
    a1, a2, a3 = flat(verma_greedy), flat(verma_sa), flat(verma_tabu)
    b1, b2, b3 = flat(monotone_greedy), flat(monotone_sa), flat(monotone_tabu)
    # Calculate statistica
    greedy = stats.ttest_ind(a1, b1)
    sa = stats.ttest_ind(a2, b2)
    tabu = stats.ttest_ind(a3, b3)
    # Populate table
    new = [greedy[0], greedy[1], sa[0], sa[1], tabu[0], tabu[1]]
    detailed_significance.loc[f'{i}'] = new

In [115]:
detailed_significance

Unnamed: 0_level_0,Greedy Algorithm t-statistic,Greedy Algorithm p-value,Simulated Annealing t-statistic,Simulated Annealing p-value,Tabu Search t-statistic,Tabu Search p-value
Problem,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,-5.662429,2.266318e-05,-7.306421,8.696864e-07,-8.28936,1.472107e-07
1,-5.752806,1.877086e-05,-6.926833,1.78773e-06,-7.270663,9.299948e-07
2,-5.804057,1.687609e-05,-5.53047,2.98941e-05,-9.945917,9.707649e-09
3,-5.425439,3.732083e-05,-7.973593,2.568796e-07,-9.117955,3.626589e-08
4,-6.217113,7.246788e-06,-5.900312,1.383144e-05,-9.876442,1.081043e-08
5,-9.450195,2.11687e-08,-6.037527,1.04372e-05,-6.158793,8.154486e-06
6,-8.617814,8.362194e-08,-5.854645,1.519838e-05,-6.449166,4.551581e-06
7,-8.76927,6.472039e-08,-6.041676,1.034912e-05,-6.387608,5.145637e-06
8,-8.312193,1.414725e-07,-5.559962,2.809495e-05,-6.653488,3.040279e-06
9,-4.060914,0.0007333107,-7.100688,1.282092e-06,-4.214761,0.0005208584
