In [33]:
import os
import itertools
from benchmarkrewriter.benchmark_parser import BenchmarkParser, WorkerBenchmarkParser, WorkerEncoding
import json
import pandas as pd
import math

In [34]:
results_path = r'C:\Users\huda\Downloads\results\\'
ga_path = r'C:\Users\huda\Downloads\ga_worker_results\results.json'
benchmark_path = r'C:\Users\huda\Documents\GitHub\scheduling_model_jrc\code\upgrades\benchmarks_with_workers'

In [35]:
def worker_flexibility(benchmark : WorkerEncoding):
    n_assignments = 0
    m = benchmark.n_machines()
    o = benchmark.n_operations()
    durations = benchmark.durations()
    w = durations.shape[2]
    combs = dict()
    for i in range(len(durations)):
        for j in range(len(durations[i])):
            for k in range(len(durations[i][j])):
                if durations[i][j][k] > 0:
                    n_assignments += 1
                combs[(j,k)] = 1
    average_assignments = n_assignments / o
    return average_assignments / len(combs)

In [36]:
def get_flexibility_and_dv_worker(benchmark):
    all = 0
    unique = []
    machines_available = 0
    durations = benchmark.durations()
    for i in range(len(durations)):
        for j in range(len(durations[i])):
            for k in range(len(durations[i][j])):
                if durations[i][j][k] > 0:
                    if durations[i][j][k] not in unique:
                        unique.append(durations[i][j][k])
                    all += 1
            if any([x > 0 for x in durations[i][j]]):
                machines_available+=1
    return worker_flexibility(benchmark), len(unique) / all

In [37]:
def get_max(durations):
    max = 0
    for operation in durations:
        for machine in operation:
            for worker in machine:
                if worker > max:
                    max = worker
    return max

def read_benchmarks_workers(path):
    result = dict()
    benchmarks = os.listdir(path)
    for benchmark in benchmarks:
        #parser = BenchmarkParser()
        parser = WorkerBenchmarkParser()
        data = parser.parse_benchmark(path + '/' + benchmark)
        #f, dv = get_flexibility_and_dv(data)
        f, dv = get_flexibility_and_dv_worker(data)
        instance_name = remap(benchmark[2:-12])
        metrics = dict()

        max_duration = get_max(data.durations())
        counts = [0] * (max_duration+1)
        for operation in data.durations():
            for machine in operation:
                for worker in machine:
                    if worker > 0:
                        counts[worker] += 1
        d_distinct = [x for x in range(len(counts)) if counts[x] > 0]
        d_unique = [x for x in range(len(counts)) if counts[x] == 1]
        d_shared = [x for x in range(len(counts)) if counts[x] > 1]
        metrics['d_distinct'] = d_distinct
        metrics['d_unique'] = d_unique
        metrics['d_shared'] = d_shared
        metrics['d_average'] = sum(counts)/data.n_operations()
        result[instance_name] = {'n_operations': data.n_operations(), 'flexibility': f, 'duration_variety': dv, 'n_machines': data.n_machines(), 'additional_metrics': metrics}

    return result

def remap(name):
    if name.startswith('_'):
        name = name[1:]
    values = name.split('_')
    if values[0].startswith('Behnke'):
        return 'Behnke'+values[1]
    if values[0].startswith('Brandimarte'):
        return 'BrandimarteMk'+values[1]
    if values[0].startswith('Chambers'):
        return 'ChambersBarnes'+values[1]
    if values[0].startswith('HurinkS'):
        return 'HurinkSdata'+values[1]
    if values[0].startswith('HurinkE'):
        return 'HurinkEdata'+values[1]
    if values[0].startswith('HurinkR'):
        return 'HurinkRdata'+values[1]
    if values[0].startswith('HurinkV'):
        return 'HurinkVdata'+values[1]
    if values[0].startswith('DP'):
        return 'DPpaulli'+values[1]
    if values[0].startswith('Kacem'):
        return 'Kacem'+values[1]
    if values[0].startswith('Fattahi'):
        return 'Fattahi'+values[1]
    return name

def read_results(path):
    files = os.listdir(path)
    #files = [path_to_results + r'results_ortools.txt', path_to_results +r'results_cplex_cp.txt', path_to_results +r'results_gurobi.txt', path_to_results +r'results_hexaly_rewritten.txt', path_to_results +r'results_cplex_lp_rewritten.txt']
    all_data = dict()
    data_as_dict = dict()
    known_optima = dict()
    for file in files:
        statuses = []
        optimal = 0
        feasible = 0
        infeasible = 0
        file_content = pd.read_csv(path + file, names=['name','optimization_status','fitness_value','lower_bound', 'runtime', 'result_vector1', 'result_vector2', 'result_vector3', 'peak_cpu', 'peak_ram', 'resource_history', 'best_result_history'], sep=';')#, converters={'best_result_history': pd.eval})
        df = pd.DataFrame(file_content)
        name = file.split('\\')[-1][8:-4]
        if name == 'hexaly_rewritten':
            name = 'hexaly'#all_data['results_hexaly'] = df
        elif name == 'cplex_lp_rewritten':
            name = 'cplex_lp'#all_data['results_cplex_lp'] = df
        all_data[name] = df
        for index, row in df.iterrows():
            if row['name'].startswith('Error'):
                infeasible += 1
                continue
            if name not in data_as_dict:
                data_as_dict[name] = dict()
            instance_name = remap(row['name'][2:-12])
            if name == 'hexaly' or not row['optimization_status'] < 0:
                data_as_dict[name][instance_name] = row['fitness_value']
                if row['optimization_status'] == 1.0:
                    found = False
                    for optima in known_optima:
                        if optima[0] == instance_name:
                            found = True
                            break
                    if not found:
                        known_optima[instance_name] = row['fitness_value']#.append((instance_name, row['fitness_value']))
                    optimal += 1
                else:
                    feasible += 1
            else:
                infeasible += 1
            if row['optimization_status'] not in statuses:
                statuses.append(row['optimization_status'])
        print(f'{name}: {statuses} - optimal: {optimal}, feasible: {feasible}, infeasible: {infeasible}')
    return data_as_dict, known_optima

def read_ga_results(path):
    results = dict()
    with open(path, 'r') as f:
        ga_data = json.loads(f.read())

        results['ga_best'] = dict()
        results['ga_average'] = dict()
        for key in ga_data.keys():
            results['ga_best'][remap(key[:-8])] = ga_data[key]['best']
            results['ga_average'][remap(key[:-8])] = ga_data[key]['average']
    return results

In [38]:
benchmark_data = read_benchmarks_workers(benchmark_path)

In [39]:
results_data, known_optima = read_results(results_path)
ga_data = read_ga_results(ga_path)
for key in ga_data:
    results_data[key] = ga_data[key]
    optimal = 0
    feasible = 0
    infeasible = 0 # will stay 0, obviously
    for instance in ga_data[key]:
        if instance in known_optima and ga_data[key][instance] == known_optima[instance]:
            optimal += 1
        else:
            feasible += 1
    print(f'{key}:[0.0, 1.0] - optimal: {optimal}, feasible: {feasible}, infeasible: {infeasible}')
print(len(known_optima))
print(known_optima)

cplex_cp: [0.0, 1.0] - optimal: 55, feasible: 347, infeasible: 2
cplex_lp: [1.0, 0.0, -1.0] - optimal: 12, feasible: 309, infeasible: 81
gurobi: [0.0, -1.0, 1.0] - optimal: 15, feasible: 305, infeasible: 70
hexaly: [-1, 1] - optimal: 29, feasible: 373, infeasible: 0
ortools: [0.0, 1.0] - optimal: 39, feasible: 358, infeasible: 5
ga_best:[0.0, 1.0] - optimal: 28, feasible: 374, infeasible: 0
ga_average:[0.0, 1.0] - optimal: 20, feasible: 382, infeasible: 0
69
{'BrandimarteMk1': 38.0, 'Hurink_sdata_1': 52.0, 'Hurink_sdata_4': 622.0, 'Hurink_sdata_8': 565.0, 'Hurink_edata_1': 51.0, 'Hurink_edata_53': 6832.0, 'Hurink_edata_5': 620.0, 'Hurink_edata_7': 538.0, 'Hurink_edata_8': 481.0, 'Hurink_rdata_19': 656.0, 'Hurink_rdata_1': 44.0, 'Hurink_rdata_45': 748, 'Hurink_rdata_55': 4170.0, 'Hurink_vdata_19': 651.0, 'Hurink_vdata_1': 44.0, 'Hurink_vdata_20': 585.0, 'Hurink_vdata_21': 602.0, 'Hurink_vdata_22': 562.0, 'Hurink_vdata_23': 686.0, 'Hurink_vdata_2': 597.0, 'Hurink_vdata_39': 854.0, 'Hurin

In [40]:
cp_data = results_data['cplex_cp']
ga_data = ga_data['ga_best']


In [41]:
cp_data.keys()

dict_keys(['Behnke10', 'Behnke11', 'Behnke12', 'Behnke13', 'Behnke14', 'Behnke15', 'Behnke16', 'Behnke17', 'Behnke18', 'Behnke19', 'Behnke1', 'Behnke20', 'Behnke21', 'Behnke22', 'Behnke23', 'Behnke24', 'Behnke25', 'Behnke26', 'Behnke27', 'Behnke28', 'Behnke29', 'Behnke2', 'Behnke30', 'Behnke31', 'Behnke32', 'Behnke33', 'Behnke34', 'Behnke35', 'Behnke36', 'Behnke37', 'Behnke38', 'Behnke39', 'Behnke3', 'Behnke40', 'Behnke41', 'Behnke42', 'Behnke43', 'Behnke44', 'Behnke45', 'Behnke46', 'Behnke47', 'Behnke48', 'Behnke49', 'Behnke4', 'Behnke50', 'Behnke51', 'Behnke52', 'Behnke53', 'Behnke54', 'Behnke55', 'Behnke56', 'Behnke57', 'Behnke58', 'Behnke59', 'Behnke5', 'Behnke60', 'Behnke6', 'Behnke7', 'Behnke8', 'Behnke9', 'BrandimarteMk11', 'BrandimarteMk12', 'BrandimarteMk13', 'BrandimarteMk14', 'BrandimarteMk15', 'BrandimarteMk1', 'BrandimarteMk2', 'BrandimarteMk3', 'BrandimarteMk4', 'BrandimarteMk5', 'BrandimarteMk6', 'BrandimarteMk7', 'BrandimarteMk9', 'Hurink_sdata_10', 'Hurink_sdata_11', '

In [42]:
ga_data.keys()

dict_keys(['Behnke10', 'Behnke11', 'Behnke12', 'Behnke13', 'Behnke14', 'Behnke15', 'Behnke16', 'Behnke17', 'Behnke18', 'Behnke19', 'Behnke1', 'Behnke20', 'Behnke21', 'Behnke22', 'Behnke23', 'Behnke24', 'Behnke25', 'Behnke26', 'Behnke27', 'Behnke28', 'Behnke29', 'Behnke2', 'Behnke30', 'Behnke31', 'Behnke32', 'Behnke33', 'Behnke34', 'Behnke35', 'Behnke36', 'Behnke37', 'Behnke38', 'Behnke39', 'Behnke3', 'Behnke40', 'Behnke41', 'Behnke42', 'Behnke43', 'Behnke44', 'Behnke45', 'Behnke46', 'Behnke47', 'Behnke48', 'Behnke49', 'Behnke4', 'Behnke50', 'Behnke51', 'Behnke52', 'Behnke53', 'Behnke54', 'Behnke55', 'Behnke56', 'Behnke57', 'Behnke58', 'Behnke59', 'Behnke5', 'Behnke60', 'Behnke6', 'Behnke7', 'Behnke8', 'Behnke9', 'BrandimarteMk10', 'BrandimarteMk11', 'BrandimarteMk12', 'BrandimarteMk13', 'BrandimarteMk14', 'BrandimarteMk15', 'BrandimarteMk1', 'BrandimarteMk2', 'BrandimarteMk3', 'BrandimarteMk4', 'BrandimarteMk5', 'BrandimarteMk6', 'BrandimarteMk7', 'BrandimarteMk8', 'BrandimarteMk9', 'H

In [43]:
b = 0
w = 0
t = 0
ga_instances = []
cp_instances = []
tied_instances = []
best = dict()
for instance in ga_data:
    if instance in cp_data:
        if ga_data[instance] == cp_data[instance]:
            t+=1
            best[instance] = ga_data[instance]
            tied_instances.append(instance)
        elif ga_data[instance] < cp_data[instance]:
            b+=1
            best[instance] = ga_data[instance]
            ga_instances.append(instance)
        else:
            w+=1
            best[instance] = cp_data[instance]
            cp_instances.append(instance)
    else:
        b+=1
        best[instance] = ga_data[instance]
        ga_instances.append(instance)

In [44]:
print(b)
print(t)
print(w)

11
35
356


In [45]:
def calculate_value(fitness, best):
    return ((fitness - best) / best)

In [46]:
ga_delta = dict()
cp_delta = dict()
for instance in ga_data:
    ga_delta[instance] = calculate_value(ga_data[instance], best[instance])
    if instance in cp_data:
        cp_delta[instance] = calculate_value(cp_data[instance], best[instance])


In [47]:
all_ga_delta = [ga_delta[x] for x in ga_delta]
all_cp_delta = [cp_delta[x] for x in cp_delta]


In [48]:
import statistics
ga_max = max(all_ga_delta)
cp_max = max(all_cp_delta)
ga_mean = statistics.mean(all_ga_delta)
cp_mean = statistics.mean(all_cp_delta)
ga_stdev = statistics.stdev(all_ga_delta)
cp_stdev = statistics.stdev(all_cp_delta)
ga_median = statistics.median(all_ga_delta)
cp_median = statistics.median(all_cp_delta)

In [49]:
ga_stats = dict()
cp_stats = dict()
tied_stats = dict()
for instance in benchmark_data:
    if instance in ga_instances:
        ga_stats[instance] = benchmark_data[instance]
    elif instance in tied_instances:
        tied_stats[instance] = benchmark_data[instance]
    else:
        cp_stats[instance] = benchmark_data[instance]


In [50]:
def get_data(data):
    metrics = dict()
    for instance in data:
        for key in data[instance]:
            if key not in metrics:
                metrics[key] = []
            metrics[key].append(data[instance][key])
    return metrics

In [51]:
metrics = {
    'ga': get_data(ga_stats),
    'tie': get_data(tied_stats),
    'cp': get_data(cp_stats)
}
for key in metrics:
    print(key)
    for metric in metrics[key]:
        if metric == 'additional_metrics':
            print('d_average')
            d_average = [metrics[key][metric][i]['d_average'] for i in range(len(metrics[key][metric]))]
            mean = statistics.mean(d_average)
            median = statistics.median(d_average)
            stdev = statistics.stdev(d_average)
            print(f'Max: {max(d_average)}, Min: {min(d_average)}, Mean: {mean}, Median: {median}, Stdev: {stdev}')
        else:
            print(metric)
            mean = statistics.mean(metrics[key][metric])
            median = statistics.median(metrics[key][metric])
            stdev = statistics.stdev(metrics[key][metric])
            print(f'Max: {max(metrics[key][metric])}, Min: {min(metrics[key][metric])}, Mean: {mean}, Median: {median}, Stdev: {stdev}')

ga
n_operations
Max: 500, Min: 50, Mean: 166.9090909090909, Median: 100, Stdev: 174.02152426953083
flexibility
Max: 0.18830370370370372, Min: 0.04925925925925926, Mean: 0.123121047844902, Median: 0.14896, Stdev: 0.05115019308455597
duration_variety
Max: 0.7315175097276264, Min: 5.873853130176333e-05, Mean: 0.14705631955672713, Median: 0.0005595595147499888, Stdev: 0.27445313794124404
n_machines
Max: 60, Min: 8, Mean: 41.72727272727273, Median: 60, Stdev: 25.369631093458608
d_average
Max: 1016.84, Min: 7.138888888888889, Mean: 546.9953762626262, Median: 804.384, Stdev: 432.2565083980827
tie
n_operations
Max: 277, Min: 4, Mean: 50.22857142857143, Median: 36, Stdev: 60.79864826007319
flexibility
Max: 0.75, Min: 0.05245596761842249, Mean: 0.27549333712805807, Median: 0.19944444444444445, Stdev: 0.1974570005832944
duration_variety
Max: 0.95, Min: 0.000541829215431296, Mean: 0.36478148308803887, Median: 0.3161290322580645, Stdev: 0.32373069164936513
n_machines
Max: 60, Min: 2, Mean: 9.742857

In [52]:
ga_instances

['Behnke41',
 'Behnke42',
 'Behnke44',
 'Behnke47',
 'Behnke54',
 'Behnke56',
 'Behnke57',
 'Hurink_sdata_2',
 'Hurink_sdata_54',
 'Hurink_edata_56',
 'ChambersBarnes7']