In [11]:
import os

import duckdb
import numpy as np
import pandas as pd
from deltalake import DeltaTable

In [12]:
DELTA_PATH = "../results/"

In [13]:
soa = pd.read_csv(f"{DELTA_PATH}/soa_processed.csv")

In [16]:
def list_delta_table_paths(base_path):
    """
    Traverse the base_path and return a list of paths that contain Delta Tables.
    A directory is assumed to be a Delta Table if it contains a _delta_log folder.
    """
    delta_paths = []
    for root, dirs, files in os.walk(base_path):
        if '_delta_log' in dirs:
            delta_paths.append(root)
            dirs[:] = []  
    return delta_paths

def read_all_delta_tables(base_path):
    """
    Read and return all Delta Tables found under the base_path.
    Returns a dictionary where keys are table paths and values are DeltaTable objects.
    """
    delta_paths = list_delta_table_paths(base_path)
    tables = {}
    for path in delta_paths:
        try:
            dt = DeltaTable(path)
            tables[path] = dt
            # print(f"Loaded Delta Table: {path}")
        except Exception as e:
            print(f"Failed to read Delta Table at {path}: {e}")
    return tables

def get_experiment(base_path):
    delta_tables = read_all_delta_tables(base_path)
    df = pd.DataFrame()
    for _, table in delta_tables.items():
        df = pd.concat([df, table.to_pandas()], ignore_index=True)

    return df

def process_experiments(df: pd.DataFrame, experiment_name: str) -> pd.DataFrame:
    """
    Process the experiments dictionary to create a DataFrame with all results.
    """
    df = (
        df
        .assign(
            instance_symmetry=lambda x: x["instance_name"].str.split(".").str[1].str.upper(),
            instance_name=lambda x: x["instance_name"].str.split(".").str[0],
        )
        .groupby([
            "experiment_id", "experiment_name", "instance_name", "instance_symmetry", 
            "k_factor", "repetitions", "model_name", "has_closed_cycle", "k_size"
        ])
        .agg(
            min_path_length=("path_length", "min"),
            max_path_length=("path_length", "max"),
            avg_path_length=("path_length", "mean"),
            std_path_length=("path_length", "std")
        )
        .reset_index()
        .loc[lambda x: x["model_name"] == "GeneticAlgorithmNearestNeighborsEnsemble"]
        .pivot(
            index=[
                "experiment_name", "instance_name", "instance_symmetry", 
                "k_factor", "repetitions", "has_closed_cycle", "k_size"
            ],
            columns="model_name",
            values=["min_path_length", "avg_path_length", "std_path_length"]
        )
        .reset_index()
    )
    df.columns = ['_'.join([str(i) for i in col if i]) if isinstance(col, tuple) else col for col in df.columns.values]
    df = df.rename(columns={
        "max_path_length_GeneticAlgorithmNearestNeighborsEnsemble": f"max_our_{experiment_name}",
        "min_path_length_GeneticAlgorithmNearestNeighborsEnsemble": f"min_our_{experiment_name}",
        "avg_path_length_GeneticAlgorithmNearestNeighborsEnsemble": f"avg_our_{experiment_name}",
        "std_path_length_GeneticAlgorithmNearestNeighborsEnsemble": f"std_our_{experiment_name}"
    })
    return df

In [17]:
final = get_experiment(os.path.join(DELTA_PATH, "final_experiment_with_bayesian_optimization"))
final = process_experiments(final, "final")

In [18]:
final

Unnamed: 0,experiment_name,instance_name,instance_symmetry,k_factor,repetitions,has_closed_cycle,k_size,min_our_final,avg_our_final,std_our_final
0,final_experiment_with_bayesian_optimization,a280,TSP,0.25,30,False,70,589.0,590.933333,0.365148
1,final_experiment_with_bayesian_optimization,a280,TSP,0.25,30,True,70,630.0,635.266667,1.552158
2,final_experiment_with_bayesian_optimization,a280,TSP,0.50,30,False,140,1265.0,1269.200000,1.689726
3,final_experiment_with_bayesian_optimization,a280,TSP,0.50,30,True,140,1356.0,1363.333333,2.758477
4,final_experiment_with_bayesian_optimization,a280,TSP,0.75,30,False,210,1942.0,1962.566667,7.486463
...,...,...,...,...,...,...,...,...,...,...
565,final_experiment_with_bayesian_optimization,ulysses22,TSP,0.25,30,True,5,637.0,637.000000,0.000000
566,final_experiment_with_bayesian_optimization,ulysses22,TSP,0.50,30,False,11,1325.0,1421.033333,37.553165
567,final_experiment_with_bayesian_optimization,ulysses22,TSP,0.50,30,True,11,1903.0,1922.066667,3.885724
568,final_experiment_with_bayesian_optimization,ulysses22,TSP,0.75,30,False,16,2139.0,2206.766667,75.631449


In [19]:
soa

Unnamed: 0,instance_name,instance_vertices,instance_edges,instance_symmetrical_type,has_closed_cycle,k_factor,k_size,gvns,hh_rand,hh_greedy,ga_w_nn
0,a280,280,39340,TSP,False,0.25,70,,,,606.0
1,a280,280,39340,TSP,True,0.25,70,687.0,670.0,683.0,686.0
2,a280,280,39340,TSP,True,0.50,140,1376.0,1314.0,1362.0,1358.0
3,a280,280,39340,TSP,True,0.75,210,2149.0,2066.0,2043.0,
4,bayg29,29,435,TSP,False,0.25,7,,,,246.0
...,...,...,...,...,...,...,...,...,...,...,...
364,ulysses22,22,253,TSP,True,0.25,5,747.0,747.0,747.0,747.0
365,ulysses16,16,136,TSP,False,0.75,12,,,,2704.0
366,ulysses22,22,253,TSP,True,0.50,11,1902.0,1902.0,1902.0,1902.0
367,ulysses22,22,253,TSP,False,0.75,16,,,,2618.0


In [23]:
analysis = (
    final
    .merge(
        soa[
            ["instance_name", "k_factor", "has_closed_cycle", "gvns", "hh_rand", "hh_greedy", "ga_w_nn"]
        ],
        on=["instance_name", "k_factor", "has_closed_cycle"],
        how="left"
    )
    .assign(
        # best value from soa
        best_soa=lambda x: x[["ga_w_nn", "gvns", "hh_rand", "hh_greedy"]].min(axis=1),
        abs_gap=lambda x: x["min_our_final"] - x["best_soa"],
        percentage_gap=lambda x: np.where(
            x["best_soa"] != 0,
            (x["abs_gap"] / x["best_soa"]),
            np.nan
        )
    )
)

In [24]:
analysis

Unnamed: 0,experiment_name,instance_name,instance_symmetry,k_factor,repetitions,has_closed_cycle,k_size,min_our_final,avg_our_final,std_our_final,gvns,hh_rand,hh_greedy,ga_w_nn,best_soa,abs_gap,percentage_gap
0,final_experiment_with_bayesian_optimization,a280,TSP,0.25,30,False,70,589.0,590.933333,0.365148,,,,606.0,606.0,-17.0,-0.028053
1,final_experiment_with_bayesian_optimization,a280,TSP,0.25,30,True,70,630.0,635.266667,1.552158,687.0,670.0,683.0,686.0,670.0,-40.0,-0.059701
2,final_experiment_with_bayesian_optimization,a280,TSP,0.50,30,False,140,1265.0,1269.200000,1.689726,,,,1234.0,1234.0,31.0,0.025122
3,final_experiment_with_bayesian_optimization,a280,TSP,0.50,30,True,140,1356.0,1363.333333,2.758477,1376.0,1314.0,1362.0,1358.0,1314.0,42.0,0.031963
4,final_experiment_with_bayesian_optimization,a280,TSP,0.75,30,False,210,1942.0,1962.566667,7.486463,,,,1894.0,1894.0,48.0,0.025343
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565,final_experiment_with_bayesian_optimization,ulysses22,TSP,0.25,30,True,5,637.0,637.000000,0.000000,747.0,747.0,747.0,747.0,747.0,-110.0,-0.147256
566,final_experiment_with_bayesian_optimization,ulysses22,TSP,0.50,30,False,11,1325.0,1421.033333,37.553165,,,,1473.0,1473.0,-148.0,-0.100475
567,final_experiment_with_bayesian_optimization,ulysses22,TSP,0.50,30,True,11,1903.0,1922.066667,3.885724,1902.0,1902.0,1902.0,1902.0,1902.0,1.0,0.000526
568,final_experiment_with_bayesian_optimization,ulysses22,TSP,0.75,30,False,16,2139.0,2206.766667,75.631449,,,,2618.0,2618.0,-479.0,-0.182964


In [None]:
(
    analysis
    # .loc[lambda x: (x["instance_symmetry"] == "TSP") & (x["ga_w_nn"].notna())]
    .groupby([
        "instance_symmetry", "has_closed_cycle", "k_factor"
    ])
    .agg(
        number_of_instances=("instance_name", "nunique"),
        avg_abs_gap=("abs_gap", "mean"),
        avg_percentage_gap=("percentage_gap", "mean")
    )
)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,number_of_instances,comparative_instances,avg_abs_gap,avg_percentage_gap
instance_symmetry,has_closed_cycle,k_factor,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ATSP,False,0.25,19,19,,
ATSP,False,0.5,19,19,,
ATSP,False,0.75,19,19,,
ATSP,True,0.25,19,19,,
ATSP,True,0.5,19,19,,
ATSP,True,0.75,19,19,,
TSP,False,0.25,76,76,-686.78,-0.111097
TSP,False,0.5,76,76,-525.8,-0.013816
TSP,False,0.75,76,76,76.86,0.017632
TSP,True,0.25,76,76,-700.068493,-0.083324
