In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
all_data = []
file_paths = ['data/results_ortools.txt', 'data/results_cplex_cp.txt', 'data/results_cplex_lp.txt', 'data/results_gurobi.txt', 'data/results_hexaly.txt', 'data/results_ga_best.txt', 'data/results_ga_median.txt']
optimizers = ['ORTOOLS', 'CPLEX-CP', 'CPLEX-LP', 'GUROBI', 'HEXALY', 'GA-BEST', 'GA-MEDIAN']

In [None]:
def read_file(file : str) -> pd.DataFrame:
    #file_content = pd.read_csv(file, names=['name','optimization_status','fitness_value','lower_bound', 'runtime', 'result_vector1', 'result_vector2', 'result_vector3', 'peak_cpu', 'peak_ram', 'resource_history', 'best_result_history'], sep=';')#, converters={'best_result_history': pd.eval})
    # NOTE: fill optimization status for GA versions with 0
    file_content = pd.read_csv(file, names=['name','optimization_status','fitness_value', 'runtime', 'resource_history', 'best_result_history'], sep=';')
    df = pd.DataFrame(file_content)
    return df[~df['name'].str.startswith('Error')]

In [None]:
for file in file_paths:
    all_data.append(read_file(file))

In [None]:
def get_comparison(data, instance):
    best = float('inf')
    for key in data.keys():
        optimizer = data[key]
        fitness = optimizer.loc[optimizer['name'] == instance]['fitness_value']
        if len(fitness.tolist()) > 1:
            fitness = fitness.tolist()[1]
        elif len(fitness.tolist()) == 0:
            print(instance)
            print(optimizer.loc[optimizer['name'] == instance])
        else:
            fitness = fitness.tolist()[0]
        if fitness < best:
            best = fitness
    return best

def calculate_value(fitness, best):
    return ((fitness - best) / best)# * 100

def skip(data, instance):
    for optimizer in data.keys():
        if not instance in data[optimizer].name.values:
            return True
    return False

def get_cdf_comparison_vector(data : dict, column):
    # TODO: handle missing results
    results = []
    for optimizer in data:
        print(optimizer)
        results.append([])
    
    for index, row in data[list(data.keys())[0]].iterrows():
        instance = row['name']
        if not skip(data, instance):
            best = get_comparison(data, instance)
            i = 0
            for key in data.keys():
                results[i].append(calculate_value(data[key].loc[data[key]['name'] == instance][column], best).item())
                i += 1
    for result in results:
        result.sort()
    return results

In [None]:
result_vectors = get_cdf_comparison_vector(all_data, 'fitness_value')

In [None]:
for i in range(len(result_vectors)):
    plt.ecdf(result_vectors[i], label=optimizers[i])
plt.legend()
plt.show()

In [None]:
def get_history(df, instance):
    fitness_history = []
    timestamp_history = []
    try:
        values = pd.eval(df.loc[df['name'] == instance]['best_result_history'])
        for entry in values:
            for nested_entry in entry: # ??
                timestamp_history.append(nested_entry[0])
                fitness_history.append(nested_entry[1])
    except:
        pass
    if len(fitness_history) > 0:
        fitness_history.append(fitness_history[-1])
        timestamp_history.append(1200 if timestamp_history[-1] < 1200 else timestamp_history[-1]) # max time limit
    return fitness_history, timestamp_history

In [None]:
cols = 5
rows = 81
fig, axs = plt.subplots(rows, cols)
fig.set_figheight(200)
fig.set_figwidth(20)

fig.suptitle(f'Fitness')
fig.legend(handles=optimizers, loc='upper right')

rows = all_data[-2]['name'].tolist()

index = 0
for instance in rows:
    for i in range(len(all_data)):
        data = all_data[i]
        fitness, timestamps = get_history(data, instance)
        if len(fitness) > 1:
            axs[int(index / cols), index % cols].set_ylim(0, fitness[1])
            axs[int(index / cols), index % cols].plot(timestamps, fitness, linestyle='solid')
            axs[int(index / cols), index % cols].set_title(f'{instance}')
    index += 1
fig.tight_layout()