# Analyse the GA results

In [42]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

%matplotlib widget

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [43]:
import os
import numpy as np
import pandas as pd
import logging
import pickle
import jupyter_black
from tqdm import tqdm

from src.data_connectors import write_solution_files
from src.data_connectors import read_input_files

jupyter_black.load()

logging.basicConfig()
logging.getLogger().setLevel(logging.INFO)

## Read the results

In [44]:
with open("../data/solutions/all_ga.pkl", "rb") as f:
    loaded_results = pickle.load(f)

In [45]:
len(loaded_results)

4

In [55]:
loaded_results[0][-1]

217

## Process results

list[all_surviving_chromosomes, all_surviving_makespans, best_makespan, duration_until_best_makespan]

In [46]:
instances_path = "../data/input/HRTInstances/"
instances_list = [x for x in range(217, 221)]

times_of_best_results = []

for best, instance_number in tqdm(zip(loaded_results, instances_list)):
    ins_x = ins_x = read_input_files.read_file(
        os.path.join(instances_path, f"Instance_{instance_number}.txt")
    )
    times_of_best_results.append(write_solution_files.find_times_of_best_solution(ins_x, best))

4it [00:00, 12.81it/s]


In [47]:
times_of_best_results[0]

{'1': {1: (0, 1), 8: (38, 48), 11: (49, 54), 16: (419, 424), 19: (425, 519)},
 '2': {21: (0, 1),
  28: (751, 761),
  31: (762, 767),
  36: (1132, 1137),
  39: (1138, 1232)},
 '0': {2: (2, 37),
  3: (38, 43),
  4: (44, 74),
  5: (75, 174),
  6: (175, 211),
  7: (212, 253),
  9: (254, 274),
  10: (275, 373),
  12: (374, 418),
  13: (419, 466),
  14: (467, 504),
  15: (505, 566),
  17: (567, 597),
  18: (598, 625),
  20: (626, 714),
  22: (715, 750),
  23: (751, 756),
  24: (757, 787),
  25: (788, 887),
  26: (888, 924),
  27: (925, 966),
  29: (967, 987),
  30: (988, 1086),
  32: (1087, 1131),
  33: (1132, 1179),
  34: (1180, 1217),
  35: (1218, 1279),
  37: (1280, 1310),
  38: (1311, 1338),
  40: (1339, 1427)}}

## Optimal Solutions

In [48]:
df_optimal = pd.read_csv("../data/solutions/optimal/OptimalSolutions.csv", sep=";", header=1)

In [49]:
df_optimal.head()

Unnamed: 0,Instance,Humans,Robots,Robot Eligibility,Optimal solution,Best known solution,Lower bound
0,217,1,2,25,1397.0,1397,1397
1,218,1,2,5,865.0,865,865
2,219,1,2,1,529.0,529,529
3,220,1,2,25,3316.0,3316,3316
4,221,1,2,5,2476.0,2476,2476


In [50]:
def get_gap(df_optimal: pd.DataFrame, instance_number: int, best: tuple) -> float:
    lower_bound = df_optimal[df_optimal.Instance == instance_number]["Lower bound"].values[0]
    ins_result = best[2]
    gap = round((ins_result - lower_bound) / lower_bound * 100, 2)
    logging.info(f"Lower Bound: {lower_bound}, Result {ins_result}")
    logging.info(f"Instance {instance_number} has Gap % = {gap} %")
    return gap

In [51]:
instances_list = [x for x in range(217, 221)]

pct_deviation_from_best = []

for best, instance_number in tqdm(zip(loaded_results, instances_list)):
    optimal = df_optimal[df_optimal.Instance == instance_number]["Optimal solution"].values[0]
    best_know = df_optimal[df_optimal.Instance == instance_number]["Best known solution"].values[0]
    logging.info(f"Optimal: {optimal}, Best Known: {best_know}")
    get_gap(df_optimal, instance_number, best)

0it [00:00, ?it/s]INFO:root:Optimal: 1397.0, Best Known: 1397
INFO:root:Lower Bound: 1397, Result 1427
INFO:root:Instance 217 has Gap % = 2.15 %
INFO:root:Optimal: 865.0, Best Known: 865
INFO:root:Lower Bound: 865, Result 1159
INFO:root:Instance 218 has Gap % = 33.99 %
INFO:root:Optimal: 529.0, Best Known: 529
INFO:root:Lower Bound: 529, Result 741
INFO:root:Instance 219 has Gap % = 40.08 %
INFO:root:Optimal: 3316.0, Best Known: 3316
INFO:root:Lower Bound: 3316, Result 3599
INFO:root:Instance 220 has Gap % = 8.53 %
4it [00:00, 724.78it/s]


## Group by type

In [52]:
instance_groups = (
    df_optimal.groupby(["Humans", "Robots", "Robot Eligibility"]).Instance.apply(list).reset_index()
)
instance_groups

Unnamed: 0,Humans,Robots,Robot Eligibility,Instance
0,1,2,25,"[217, 220, 229, 232, 241, 244, 253, 256, 265, ..."
1,1,2,5,"[218, 221, 230, 233, 242, 245, 254, 257, 266, ..."
2,1,2,1,"[219, 222, 231, 234, 243, 246, 255, 258, 267, ..."
3,1,3,25,"[223, 226, 235, 238, 247, 250, 259, 262, 271, ..."
4,1,3,5,"[224, 227, 236, 239, 248, 251, 260, 263, 272, ..."
5,1,3,1,"[225, 228, 237, 240, 249, 252, 261, 264, 273, ..."
6,2,2,25,"[277, 280, 289, 292, 301, 304, 313, 316, 325, ..."
7,2,2,5,"[278, 281, 290, 293, 302, 305, 314, 317, 326, ..."
8,2,2,1,"[279, 282, 291, 294, 303, 306, 315, 318, 327, ..."
9,2,3,25,"[283, 286, 295, 298, 307, 310, 319, 322, 331, ..."


In [53]:
for idx, row_group in tqdm(instance_groups.iterrows()):
    gap_group = []
    for ins_x in row_group.Instance:
        gap_x = get_gap(
            df_optimal,
            ins_x, 
        )

0it [00:00, ?it/s]


TypeError: get_gap() missing 1 required positional argument: 'best'