# Statistics for the experiments

## Experiment I - Repair function vs no repair

In [2]:
import statistics
from scipy.stats import shapiro
from scipy.stats import mannwhitneyu, ttest_ind

In [3]:
# Check data normality
def check_normality(data):
    result = shapiro(data)
    
    if result.pvalue > 0.05:
        return True
    else:
        return False
    
# Answers the question - Is the data significantly different? In other words, should I reject the H0?
def ttest(d1, d2):
    t_stat, p_value = ttest_ind(d1, d2)
    print(f"p-value: {p_value}")

    if p_value >= 0.05:
        print("Not significantly different")
    else:
        print("Significantly different")
    
def mann_whitney(d1, d2):
    stat, p_value = mannwhitneyu(d1, d2, alternative='two-sided')
    print(f"p-value: {p_value}")
    
    if p_value >= 0.05:
        print("Not significantly different")
    else:
        print("Significantly different")


In [4]:
def stats(datasets):
    for i, data in enumerate(datasets):
        # Calculate average (mean)
        mean = statistics.mean(data)

        # Calculate median
        median = statistics.median(data)

        # Calculate standard deviation
        stdev = statistics.stdev(data)

        # Print results
        print("Data " + str(i))
        print(f"Mean: {mean}, Median: {median}, StDev: {stdev}")


def check_all_normal(datasets):
    all_normal = True
    for i, data in enumerate(datasets):
        data_normal = check_normality(data)
        if data_normal == False:
            all_normal = False

    if all_normal == True:
        print("All normal")
    else:
        print("Not all normal")

## Data

In [5]:
no_repair = [71, 65, 104, 5000, 5000]
repair = [48, 46, 69, 55, 55]
full = [68, 48, 49, 49, 58, 52, 66, 57, 55, 39]
compressed = [58, 48, 37, 48, 46, 49, 48, 38, 67, 40]
no_elitism = [58, 48, 37, 48, 46, 49, 48, 38, 67, 40]
full_elitism = [37, 47, 53, 45, 38, 45, 52, 51, 40, 48]
selection_elitism = [50, 46, 47, 53, 55, 47, 44, 65, 45, 49]
cross_2pt = [58, 48, 37, 48, 46, 49, 48, 38, 67, 40]
cross_uniform = [62, 74, 47, 48, 60, 49, 39, 47, 52, 36]
flip_bit = [58, 48, 37, 48, 46, 49, 48, 38, 67, 40]
custom70 = [33, 35, 39, 45, 42, 40, 36, 34, 43, 46]
custom40 = [32, 48, 49, 38, 35, 41, 38, 42, 45, 43]
custom10 = [37, 48, 34, 41, 50, 33, 51, 40, 48, 45]
static = [58, 48, 37, 48, 46, 49, 48, 38, 67, 40]
weighted = [50, 39, 43, 44, 47, 39, 40, 38, 39, 42]
dynamic = [35, 48, 38, 40, 43, 50, 33, 43, 44, 46]
combi = [52, 41, 47, 58, 36, 44, 42, 46, 46, 63]
dynamic_05 = [48, 47, 37, 42, 34, 44, 37, 38, 39, 44]
noHC = [48, 43, 48, 58, 57, 47, 41, 56, 49, 45]
HC = [30, 27, 26, 30, 24, 26, 22, 25, 29, 30]
SAHC = [25, 26, 24, 28, 22, 23, 22, 22, 27, 26]


# times
time_full = [
    652.8551862, 598.2001185, 563.6797307, 557.1440213, 659.1640007,
    611.8931332, 794.1486332, 647.1460776, 710.0754879, 547.9254956
]

time_compressed = [
    475.3052461, 463.502598, 511.1327515, 445.0050519, 451.7827125,
    392.9554772, 413.9644699, 415.3617647, 430.7680318, 423.0939028
]

time_no_hc = [
    667.870585, 554.8225119, 559.524121, 536.6998496, 534.7185338,
    601.7310145, 613.7756863, 690.618535, 517.061594, 532.5931456
]

time_hc = [
    710.3579452, 587.6239021, 601.9113791, 584.0862556, 573.8662109,
    639.7933168, 650.5472088, 733.2280288, 559.8270011, 573.7364788
]

time_sahc = [
    985.5258517, 808.832582, 931.2391014, 1119.190049, 1027.911329,
    943.9406071, 851.7622023, 1279.955733, 912.0652082, 818.9687214
]

## Repair function

In [6]:
datasets = []
datasets.append(no_repair)
datasets.append(repair)

stats(datasets)
check_all_normal(datasets)

# Not all normal so Mann-Whitney test
mann_whitney(no_repair, repair)


Data 0
Mean: 2048, Median: 104, StDev: 2694.835894818087
Data 1
Mean: 54.6, Median: 55, StDev: 9.016651263079881
Not all normal
p-value: 0.020784343168786933
Significantly different


## Representation

In [7]:
datasets = []
datasets.append(full)
datasets.append(compressed)

stats(datasets)
check_all_normal(datasets)

ttest(full, compressed)

times = []

times.append(time_full)
times.append(time_compressed)

stats(times)
check_all_normal(times)

ttest(time_full, time_compressed)

Data 0
Mean: 54.1, Median: 53.5, StDev: 8.698020208964541
Data 1
Mean: 47.9, Median: 48.0, StDev: 9.13418487514531
All normal
p-value: 0.1374876293933838
Not significantly different
Data 0
Mean: 634.22318849, Median: 629.5196054, StDev: 76.46864128324758
Data 1
Mean: 442.28720064, Median: 437.88654185, StDev: 34.70413807017296
All normal
p-value: 1.0080715610068273e-06
Significantly different


## Elitism

In [8]:
datasets = []
datasets.append(no_elitism)
datasets.append(full_elitism)
datasets.append(selection_elitism)

stats(datasets)
check_all_normal(datasets)

# Not all normal so Mann-Whitney test
mann_whitney(no_elitism, full_elitism)
mann_whitney(no_elitism, selection_elitism)

Data 0
Mean: 47.9, Median: 48.0, StDev: 9.13418487514531
Data 1
Mean: 45.6, Median: 46.0, StDev: 5.738757124441959
Data 2
Mean: 50.1, Median: 48.0, StDev: 6.279596590015424
Not all normal
p-value: 0.6759715637102298
Not significantly different
p-value: 0.5697331202732221
Not significantly different


## Crossover

In [9]:
datasets = []
datasets.append(cross_2pt)
datasets.append(cross_uniform)

stats(datasets)
check_all_normal(datasets)

ttest(cross_2pt, cross_uniform)

Data 0
Mean: 47.9, Median: 48.0, StDev: 9.13418487514531
Data 1
Mean: 51.4, Median: 48.5, StDev: 11.276327219248099
All normal
p-value: 0.45552553236418136
Not significantly different


## Mutation

In [10]:
datasets = []
datasets.append(flip_bit)
datasets.append(custom70)
datasets.append(custom40)
datasets.append(custom10)

stats(datasets)
check_all_normal(datasets)

ttest(flip_bit, custom70)
ttest(flip_bit, custom40)
ttest(flip_bit, custom10)
ttest(custom70, custom10)
ttest(custom70, custom40)


Data 0
Mean: 47.9, Median: 48.0, StDev: 9.13418487514531
Data 1
Mean: 39.3, Median: 39.5, StDev: 4.667856991049414
Data 2
Mean: 41.1, Median: 41.5, StDev: 5.466056876558986
Data 3
Mean: 42.7, Median: 43.0, StDev: 6.634087059355727
All normal
p-value: 0.01624715676935497
Significantly different
p-value: 0.05851138366248477
Not significantly different
p-value: 0.16244765879691508
Not significantly different
p-value: 0.20159743951385595
Not significantly different
p-value: 0.43873428147686944
Not significantly different


## Penalty function

In [11]:
datasets = []
datasets.append(static)
datasets.append(weighted)
datasets.append(dynamic)
datasets.append(combi)
datasets.append(dynamic_05)

stats(datasets)
check_all_normal(datasets)

ttest(static, weighted)
ttest(static, dynamic)
ttest(static, combi)
ttest(static, dynamic_05)

Data 0
Mean: 47.9, Median: 48.0, StDev: 9.13418487514531
Data 1
Mean: 42.1, Median: 41.0, StDev: 3.956710193526379
Data 2
Mean: 42, Median: 43.0, StDev: 5.497474167490214
Data 3
Mean: 47.5, Median: 46.0, StDev: 8.11377429642539
Data 4
Mean: 41, Median: 40.5, StDev: 4.69041575982343
All normal
p-value: 0.081931550245561
Not significantly different
p-value: 0.09713236605924601
Not significantly different
p-value: 0.9186848164942641
Not significantly different
p-value: 0.0476905308978795
Significantly different


## Hill climbers

In [12]:
datasets = []
datasets.append(noHC)
datasets.append(HC)
datasets.append(SAHC)

stats(datasets)
check_all_normal(datasets)

ttest(noHC, HC)
ttest(noHC, SAHC)
ttest(HC, SAHC)

time = []
time.append(time_no_hc)
time.append(time_hc)
time.append(time_sahc)

stats(times)
check_all_normal(times)

ttest(time_no_hc, time_hc)
ttest(time_no_hc, time_sahc)
ttest(time_hc, time_sahc)

Data 0
Mean: 49.2, Median: 48.0, StDev: 5.921711464320654
Data 1
Mean: 26.9, Median: 26.5, StDev: 2.8067379246694513
Data 2
Mean: 24.5, Median: 24.5, StDev: 2.223610677354389
All normal
p-value: 2.8557708722655913e-09
Significantly different
p-value: 3.179531498378863e-10
Significantly different
p-value: 0.048211536186093376
Significantly different
Data 0
Mean: 634.22318849, Median: 629.5196054, StDev: 76.46864128324758
Data 1
Mean: 442.28720064, Median: 437.88654185, StDev: 34.70413807017296
All normal
p-value: 0.1503611351923546
Not significantly different
p-value: 3.6654916181543056e-07
Significantly different
p-value: 1.6793087632596114e-06
Significantly different
