In [53]:
import json
import time
import random
import datetime
import copy
import pandas as pd
import numpy as np
import scipy.stats as stats
import glob
import os
import re

import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap
from matplotlib.patches import FancyArrowPatch

from optimization_c_r_rc_100_100_dataset.aco import ACO
from optimization_c_r_rc_100_100_dataset.acs import ACS
from optimization_c_r_rc_100_100_dataset.bso import BSO
from optimization_c_r_rc_100_100_dataset.dka import DKA
from optimization_c_r_rc_100_100_dataset.ga import GA
from optimization_c_r_rc_100_100_dataset.pso import PSO
from optimization_c_r_rc_100_100_dataset.sa import SA
from optimization_c_r_rc_100_100_dataset.tabu_search import TS
from optimization_c_r_rc_100_100_dataset.woa import WOA
from optimization_c_r_rc_100_100_dataset.ils import ILS
from optimization_c_r_rc_100_100_dataset.ils_sa import ILS_SA
from optimization_c_r_rc_100_100_dataset.ils_ts import ILS_TS
from optimization_c_r_rc_100_100_dataset.ils_sa_ts import ILS_SA_TS
from optimization_c_r_rc_100_100_dataset.mga import MGA

import warnings
warnings.filterwarnings('ignore')

In [3]:
def get_analysis(model,depot,nodes,num_vehicle):
    start = time.time()
    model.set_model(nodes = nodes,depot = depot, num_vehicle = num_vehicle)
    solution,fitness = model.construct_solution()[-2:]
    end = time.time()
    run_time = end-start
    results = {"solution":[[i["id"] for i in sol] for sol in solution],
              "fitness":fitness,
              "run_time":run_time}
    
    return results

In [4]:
dir_path = "raw_data/c_r_rc_100_100"
files = glob.glob(os.path.join(dir_path,"*.txt"))

In [5]:
def get_data(filename):
    print(filename)
    with open(filename, 'r') as f:
        lines = f.readlines()
    
    #find the start of the line
    start = len([line for line in lines if len(line.strip().split())<9])
    
    parts = list(map(float,lines[start].strip().split()))
    depot = {
        "id": parts[0], #id
        "x": parts[1], #x coordinate
        "y": parts[2], #y coordinate
        "d": parts[3], #service duration or visiting time
        "S": parts[4], #profit of the location
        "f": parts[5], #not relevant for TOPTW
        "a": parts[6], #not relevant for TOPTW
        "list": parts[7], #not relevant for TOPTW
        "O": 0, #opening of time window (earliest time for start of service) for depot its a starting point
        "C": parts[8], #closing of time window (latest time for start of service) for depot its the maximum duration (Tmax)
    }

    nodes = []
    
    for line in lines[start+1:]:
        parts = list(map(float, line.strip().split()))
        if len(parts) >= 9:
            node = {
                "id": parts[0], #id
                "x": parts[1], #x coordinate
                "y": parts[2], #y coordinate
                "d": parts[3], #service duration or visiting time
                "S": parts[4], #profit of the location
                "f": parts[5], #not relevant for TOPTW
                "a": parts[6], #not relevant for TOPTW
                "list": parts[7], #not relevant for TOPTW
                "O": parts[8], #opening of time window (earliest time for start of service)
                "C": parts[9], #closing of time window (latest time for start of service)
            }
            nodes.append(node)
        else:
            continue
    return depot,nodes

In [5]:
df_results = pd.DataFrame()

#scenario
num_vehicle = 3
random_states = [10,20,30,40,50,60,70,80,90,100]

for file in files:
    depot,nodes = get_data(file)
    for random_state in random_states:

        results = {
            "method" : [],
            "fitness" : [],
            "run_time" : []
        }

#         aco = ACO(random_state = random_state)
#         results_aco = get_analysis(aco,depot,nodes,num_vehicle)
#         results["method"].append("aco")

#         acs = ACS(random_state = random_state)
#         results_acs = get_analysis(acs,depot,nodes,num_vehicle)
#         results["method"].append("acs")

#         bso = BSO(random_state = random_state)
#         results_bso = get_analysis(bso,depot,nodes,num_vehicle)
#         results["method"].append("bso")

#         dka = DKA(random_state = random_state)
#         results_dka = get_analysis(dka,depot,nodes,num_vehicle)
#         results["method"].append("dka")

#         ga = GA(random_state = random_state)
#         results_ga = get_analysis(ga,depot,nodes,num_vehicle)
#         results["method"].append("ga")

#         pso = PSO(random_state = random_state)
#         results_pso = get_analysis(pso,depot,nodes,num_vehicle)
#         results["method"].append("pso")

#         sa = SA(random_state = random_state)
#         results_sa = get_analysis(sa,depot,nodes,num_vehicle)
#         results["method"].append("sa")

#         ts = TS(random_state = random_state)
#         results_ts = get_analysis(ts,depot,nodes,num_vehicle)
#         results["method"].append("ts")

#         woa = WOA(random_state = random_state)
#         results_woa = get_analysis(woa,depot,nodes,num_vehicle)
#         results["method"].append("woa")
        
        ils = ILS(random_state = random_state)
        results_ils = get_analysis(ils,depot,nodes,num_vehicle)
        results["method"].append("ils")
        
        ils_sa = ILS_SA(random_state = random_state)
        results_ils_sa = get_analysis(ils_sa,depot,nodes,num_vehicle)
        results["method"].append("ils_sa")
        
        ils_ts = ILS_TS(random_state = random_state)
        results_ils_ts = get_analysis(ils_ts,depot,nodes,num_vehicle)
        results["method"].append("ils_ts")
        
        ils_sa_ts = ILS_SA_TS(random_state = random_state)
        results_ils_sa_ts = get_analysis(ils_sa_ts,depot,nodes,num_vehicle)
        results["method"].append("ils_sa_ts")
        
        mga = MGA(random_state = random_state)
        results_mga = get_analysis(mga,depot,nodes,num_vehicle)
        results["method"].append("mga")

        for i in [results_aco,results_acs,results_bso,results_dka,results_ga,
                  results_pso,results_sa,results_ts,results_woa,results_ils,
                  results_ils_sa,results_ils_ts,results_ils_sa_ts,results_mga]:
            results["fitness"].append(i["fitness"])
            results["run_time"].append(i["run_time"])

        results = pd.DataFrame(results)
        results["dataset"] = file.split("\\")[-1]
        results["random_state"] = random_state

        df_results = pd.concat([df_results,results])

        print("========================================")

raw_data/c_r_rc_100_100\c101.txt
raw_data/c_r_rc_100_100\c102.txt
raw_data/c_r_rc_100_100\c103.txt
raw_data/c_r_rc_100_100\c104.txt
raw_data/c_r_rc_100_100\c105.txt
raw_data/c_r_rc_100_100\c106.txt
raw_data/c_r_rc_100_100\c107.txt
raw_data/c_r_rc_100_100\c108.txt
raw_data/c_r_rc_100_100\c109.txt
raw_data/c_r_rc_100_100\r101.txt
raw_data/c_r_rc_100_100\r102.txt
raw_data/c_r_rc_100_100\r103.txt
raw_data/c_r_rc_100_100\r104.txt
raw_data/c_r_rc_100_100\r105.txt
raw_data/c_r_rc_100_100\r106.txt
raw_data/c_r_rc_100_100\r107.txt
raw_data/c_r_rc_100_100\r108.txt
raw_data/c_r_rc_100_100\r109.txt
raw_data/c_r_rc_100_100\r110.txt
raw_data/c_r_rc_100_100\r111.txt
raw_data/c_r_rc_100_100\r112.txt
raw_data/c_r_rc_100_100\rc101.txt
raw_data/c_r_rc_100_100\rc102.txt
raw_data/c_r_rc_100_100\rc103.txt
raw_data/c_r_rc_100_100\rc104.txt
raw_data/c_r_rc_100_100\rc105.txt
raw_data/c_r_rc_100_100\rc106.txt
raw_data/c_r_rc_100_100\rc107.txt
raw_data/c_r_rc_100_100\rc108.txt


In [6]:
df_results

Unnamed: 0,method,fitness,run_time,dataset
0,aco,570.0,56.822612,c101.txt
1,acs,320.0,23.056549,c101.txt
2,bso,260.0,6.900478,c101.txt
3,dka,340.0,3.057640,c101.txt
4,ga,360.0,9.767205,c101.txt
...,...,...,...,...
4,ga,374.0,35.867653,rc108.txt
5,pso,365.0,2.382843,rc108.txt
6,sa,485.0,9.011644,rc108.txt
7,ts,418.0,25.180608,rc108.txt


In [8]:
df_results.to_excel("dka_public_dataset.xlsx",index=False)

run sampe sini dulu aja gapapa

# Uji Statistik

In [9]:
def uji_statistik(first_data,second_data,alternative="two-sided",first_data_name="first",second_data_name="second"):
    # first_data is the target data, e.g. the results of Hybrid ACS-BSO 
    # second_data is a list of results from other method
    # the test is using Wilcoxon Rank-Sum Test
    p_values = []
    second_data_name = ["second" for i in range(len(second_data))] if second_data_name == "second" else second_data_name
    for i in range(len(second_data)):
        statistic, p_value = stats.ranksums(first_data, second_data[i],alternative=alternative)
        p_values.append(p_value)
        print(f"Wilcoxon Rank-Sum Test ({first_data_name} vs. {second_data_name[i]}):")
        print("Test Statistic:", statistic)
        print("p-value:", p_value)
        print("")
    return p_values

def save_to_excel_sheet(df,filename,sheet_name):
    try:
        with pd.ExcelWriter(filename, mode="a", engine="openpyxl", if_sheet_exists='replace') as writer:

            # use to_excel function and specify the sheet_name and index to 
            # store the dataframe in specified sheet
            df.to_excel(writer, sheet_name=sheet_name, index=False)
    except:
        with pd.ExcelWriter(filename, mode="w", engine="openpyxl") as writer:

            # use to_excel function and specify the sheet_name and index to 
            # store the dataframe in specified sheet
            df.to_excel(writer, sheet_name=sheet_name, index=False)

In [22]:
results_df = pd.read_excel("dka_public_dataset.xlsx")
results_df["group"] = results_df["dataset"].str.extract(r"^(rc|c|r)")
results_df.head()

Unnamed: 0,method,fitness,run_time,dataset,group
0,aco,570,56.822612,c101.txt,c
1,acs,320,23.056549,c101.txt,c
2,bso,260,6.900478,c101.txt,c
3,dka,340,3.05764,c101.txt,c
4,ga,360,9.767205,c101.txt,c


In [23]:
grouped = results_df.groupby(['group','dataset','method']).agg(fitness = ('fitness','mean'),
                                             std_fitness = ('fitness','std'),
                                             run_time = ('run_time','mean'),
                                             std_run_time = ('run_time','std'))

In [24]:
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,fitness,std_fitness,run_time,std_run_time
group,method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
c,aco,695.555556,86.618576,70.873467,20.514642
c,acs,497.777778,109.861023,39.027307,16.420574
c,bso,315.555556,93.956255,6.48201,1.641909
c,dka,453.333333,91.515026,4.557625,1.656582
c,ga,518.888889,92.930679,18.741117,6.519607
c,pso,547.777778,77.26218,4.342781,1.414405
c,sa,708.888889,96.234667,8.928674,0.238226
c,ts,651.111111,117.414271,24.82697,0.569366
c,woa,505.555556,91.530201,8.605259,1.017248
r,aco,446.416667,76.793061,49.075898,10.972078


In [39]:
# group_dataset = ["c","r","rc"]
group_dataset = results_df['dataset'].unique()

df_stat = pd.DataFrame()
for group in group_dataset:
    print("group: ",group)
    metrics = "fitness"
    alternative = "two-sided"
    first_data = results_df[(results_df['method']=="dka")&
                            (results_df['group']==group)][metrics].values
    first_data_name = "dka"

    second_data = []
    second_data_name = []
    for method in results_df[~results_df['method'].isin(["dka"])]['method'].unique():
        second_data.append(results_df[(results_df['method']==method)&
                                      (results_df['group']==group)][metrics].values)
        second_data_name.append(method)

    p_values = uji_statistik(first_data,second_data,alternative=alternative,first_data_name=first_data_name,second_data_name=second_data_name)
    df_stat = pd.concat([df_stat,pd.DataFrame({'group':group,
                                               'metrics':metrics,
                                               'method':second_data_name,
                                               'p_value':p_values})])

group:  c
Wilcoxon Rank-Sum Test (dka vs. aco):
Test Statistic: -3.5320862855067836
p-value: 0.00041229480206169127

Wilcoxon Rank-Sum Test (dka vs. acs):
Test Statistic: -0.7505683356701914
p-value: 0.45291248342491686

Wilcoxon Rank-Sum Test (dka vs. bso):
Test Statistic: 2.6490647141300876
p-value: 0.008071487425268009

Wilcoxon Rank-Sum Test (dka vs. ga):
Test Statistic: -1.5011366713403829
p-value: 0.1333202155896274

Wilcoxon Rank-Sum Test (dka vs. pso):
Test Statistic: -1.8101942213222264
p-value: 0.07026567343786638

Wilcoxon Rank-Sum Test (dka vs. sa):
Test Statistic: -3.3554819712314443
p-value: 0.0007922682038502908

Wilcoxon Rank-Sum Test (dka vs. ts):
Test Statistic: -2.8698201069742617
p-value: 0.004107053847638632

Wilcoxon Rank-Sum Test (dka vs. woa):
Test Statistic: -1.059625885652035
p-value: 0.28931483238198774

group:  r
Wilcoxon Rank-Sum Test (dka vs. aco):
Test Statistic: -3.4641016151377544
p-value: 0.0005320055051392492

Wilcoxon Rank-Sum Test (dka vs. acs):
Tes

In [40]:
# group_dataset = ["c","r","rc"]
group_dataset = results_df['dataset'].unique()

df_stat2 = pd.DataFrame()
for group in group_dataset:
    print("group: ",group)
    metrics = "run_time"
    alternative = "two-sided"
    first_data = results_df[(results_df['method']=="dka")&
                            (results_df['group']==group)][metrics].values
    first_data_name = "dka"

    second_data = []
    second_data_name = []
    for method in results_df[~results_df['method'].isin(["dka"])]['method'].unique():
        second_data.append(results_df[(results_df['method']==method)&
                                      (results_df['group']==group)][metrics].values)
        second_data_name.append(method)

    p_values = uji_statistik(first_data,second_data,alternative=alternative,first_data_name=first_data_name,second_data_name=second_data_name)
    df_stat2 = pd.concat([df_stat2,pd.DataFrame({'group':group,
                                               'metrics':metrics,
                                               'method':second_data_name,
                                               'p_value':p_values})])

group:  c
Wilcoxon Rank-Sum Test (dka vs. aco):
Test Statistic: -3.5762373640756184
p-value: 0.000348575174213053

Wilcoxon Rank-Sum Test (dka vs. acs):
Test Statistic: -3.5762373640756184
p-value: 0.000348575174213053

Wilcoxon Rank-Sum Test (dka vs. bso):
Test Statistic: -2.0751006927352353
p-value: 0.03797720355552732

Wilcoxon Rank-Sum Test (dka vs. ga):
Test Statistic: -3.5762373640756184
p-value: 0.000348575174213053

Wilcoxon Rank-Sum Test (dka vs. pso):
Test Statistic: 0.4856618642571827
p-value: 0.6272069263720121

Wilcoxon Rank-Sum Test (dka vs. sa):
Test Statistic: -3.5762373640756184
p-value: 0.000348575174213053

Wilcoxon Rank-Sum Test (dka vs. ts):
Test Statistic: -3.5762373640756184
p-value: 0.000348575174213053

Wilcoxon Rank-Sum Test (dka vs. woa):
Test Statistic: -3.487935206937949
p-value: 0.0004867660221606486

group:  r
Wilcoxon Rank-Sum Test (dka vs. aco):
Test Statistic: -4.156921938165305
p-value: 3.225641456243767e-05

Wilcoxon Rank-Sum Test (dka vs. acs):
Test

In [41]:
df_stat

Unnamed: 0,group,metrics,method,p_value
0,c,fitness,aco,0.000412
1,c,fitness,acs,0.452912
2,c,fitness,bso,0.008071
3,c,fitness,ga,0.13332
4,c,fitness,pso,0.070266
5,c,fitness,sa,0.000792
6,c,fitness,ts,0.004107
7,c,fitness,woa,0.289315
0,r,fitness,aco,0.000532
1,r,fitness,acs,0.54437


In [42]:
df_stat2

Unnamed: 0,group,metrics,method,p_value
0,c,run_time,aco,0.000349
1,c,run_time,acs,0.000349
2,c,run_time,bso,0.037977
3,c,run_time,ga,0.000349
4,c,run_time,pso,0.627207
5,c,run_time,sa,0.000349
6,c,run_time,ts,0.000349
7,c,run_time,woa,0.000487
0,r,run_time,aco,3.2e-05
1,r,run_time,acs,3.2e-05


In [29]:
fitness_pivot = results_df.pivot(index=["dataset"],columns="method",values="fitness").reset_index().copy()
run_time_pivot = results_df.pivot(index=["dataset"],columns="method",values="run_time").reset_index().copy()

In [32]:
grouped1 = grouped.reset_index().copy()
grouped1['metrics'] = 'fitness'
grouped1 = grouped1.pivot(index=['metrics','group'],columns='method',values='fitness').reset_index()

grouped2 = grouped.reset_index().copy()
grouped2['metrics'] = 'run_time'
grouped2 = grouped2.pivot(index=['metrics','group'],columns='method',values='run_time').reset_index()

grouped3 = grouped.reset_index().copy()
grouped3['metrics'] = 'std_fitness'
grouped3 = grouped3.pivot(index=['metrics','group'],columns='method',values='std_fitness').reset_index()

grouped4 = grouped.reset_index().copy()
grouped4['metrics'] = 'std_run_time'
grouped4 = grouped4.pivot(index=['metrics','group'],columns='method',values='std_run_time').reset_index()


grouped = pd.concat([grouped1,grouped2,grouped3,grouped4])

In [33]:
grouped

method,metrics,group,aco,acs,bso,dka,ga,pso,sa,ts,woa
0,fitness,c,695.555556,497.777778,315.555556,453.333333,518.888889,547.777778,708.888889,651.111111,505.555556
1,fitness,r,446.416667,328.75,134.083333,311.666667,350.833333,361.5,448.0,377.666667,348.25
2,fitness,rc,450.75,281.5,158.125,347.5,370.625,377.375,464.5,361.25,373.625
0,run_time,c,70.873467,39.027307,6.48201,4.557625,18.741117,4.342781,8.928674,24.82697,8.605259
1,run_time,r,49.075898,44.179068,4.446357,4.301391,17.780878,3.905528,8.921507,25.082508,9.651576
2,run_time,rc,45.118035,24.355633,3.858394,3.640862,19.864744,4.496715,9.037262,25.365198,12.080064
0,std_fitness,c,86.618576,109.861023,93.956255,91.515026,92.930679,77.26218,96.234667,117.414271,91.530201
1,std_fitness,r,76.793061,84.197954,43.426968,46.964848,43.454958,45.723477,60.588028,64.210355,45.851787
2,std_fitness,rc,70.730373,51.205468,39.624803,21.287152,38.990612,36.229181,28.122182,40.276721,48.600228
0,std_run_time,c,20.514642,16.420574,1.641909,1.656582,6.519607,1.414405,0.238226,0.569366,1.017248


In [43]:
df_stat = df_stat.pivot(index=['metrics','group'],columns='method',values='p_value').reset_index()
df_stat2 = df_stat2.pivot(index=['metrics','group'],columns='method',values='p_value').reset_index()
df_stat = pd.concat([df_stat,df_stat2])

In [44]:
df_stat

method,metrics,group,aco,acs,bso,ga,pso,sa,ts,woa
0,fitness,c,0.000412,0.452912,0.008071,0.13332,0.070266,0.000792,0.004107,0.289315
1,fitness,r,0.000532,0.54437,4.1e-05,0.017926,0.024343,0.000344,0.007912,0.032663
2,fitness,rc,0.002762,0.011719,0.000778,0.207578,0.092892,0.000778,0.494837,0.115184
0,run_time,c,0.000349,0.000349,0.037977,0.000349,0.627207,0.000349,0.000349,0.000487
1,run_time,r,3.2e-05,3.2e-05,0.95396,3.2e-05,0.355611,3.2e-05,3.2e-05,3.2e-05
2,run_time,rc,0.000778,0.000778,0.400814,0.000778,0.293622,0.000778,0.000778,0.000778


In [45]:
save_to_excel_sheet(fitness_pivot,filename="dka_experiment_results_public_dataset.xlsx",sheet_name="fitness values all")
save_to_excel_sheet(run_time_pivot,filename="dka_experiment_results_public_dataset.xlsx",sheet_name="run time all")
save_to_excel_sheet(grouped,filename="dka_experiment_results_public_dataset.xlsx",sheet_name="results aggregated")
save_to_excel_sheet(df_stat,filename="dka_experiment_results_public_dataset.xlsx",sheet_name="results p_value")