## RPFS Problem GRASP x C&CG (Cmax objective) - Tables and Graphs

Before running this notebook, please run notebooks 0.1 and 0.2.

In [None]:
import pandas as pd
import numpy as np
import os, fnmatch
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
import glob
import seaborn as sns
import gzip
import matplotlib.style as style
from matplotlib.path import Path
from matplotlib.patches import BoxStyle

%matplotlib inline

In [None]:
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

In [None]:
linestyle_tuple = [
     ('dotted',                (0, (1, 1))),
     ('dashed',                (0, (5, 5))),
     ('densely dashed',        (0, (5, 1))),
     ('dashdotdotted',         (0, (3, 5, 1, 5, 1, 5))),
     ('densely dashdotdotted', (0, (3, 1, 1, 1, 1, 1))),

     ('dashdotted',            (0, (3, 5, 1, 5))),
     ('densely dashdotted',    (0, (3, 1, 1, 1))),
     
     ('loosely dashed',        (0, (5, 10))),
     ('loosely dashdotted',    (0, (3, 10, 1, 10))),
     

     ('loosely dashdotdotted', (0, (3, 10, 1, 10, 1, 10))),
     ('densely dotted',        (0, (1, 1))),
     ('loosely dotted',        (0, (1, 10)))]

In [None]:
# https://stackoverflow.com/questions/51483901/is-there-a-way-to-extend-the-solid-color-background-to-the-full-width-of-the-pag
class ExtendedTextBox(BoxStyle._Base):
    """
    An Extended Text Box that expands to the axes limits 
                        if set in the middle of the axes
    """

    def __init__(self, pad=0.3, width=500.):
        """
        width: 
            width of the textbox. 
            Use `ax.get_window_extent().width` 
                   to get the width of the axes.
        pad: 
            amount of padding (in vertical direction only)
        """
        self.width=width
        self.pad = pad
        super(ExtendedTextBox, self).__init__()

    def transmute(self, x0, y0, width, height, mutation_size):
        """
        x0 and y0 are the lower left corner of original text box
        They are set automatically by matplotlib
        """
        # padding
        pad = mutation_size * self.pad

        # we add the padding only to the box height
        height = height + 2.*pad
        # boundary of the padded box
        y0 = y0 - pad
        y1 = y0 + height
        _x0 = x0
        x0 = _x0 +width /2. - self.width/2.
        x1 = _x0 +width /2. + self.width/2.

        cp = [(x0, y0),
              (x1, y0), (x1, y1), (x0, y1),
              (x0, y0)]

        com = [Path.MOVETO,
               Path.LINETO, Path.LINETO, Path.LINETO,
               Path.CLOSEPOLY]

        path = Path(cp, com)

        return path

### List files in the result folder 

In [None]:
resultfolder = os.path.join(os.getcwd(), 'results', 'consolidated')
rpfs_ccg_file = os.path.join(resultfolder, 'RPFS_Cmax_CCG_all_results.pkl.gz')
rpfs_grasp_file = os.path.join(resultfolder, 'RPFS_Cmax_GRASP_all_results.pkl.gz')

### Create the output folder 

In [None]:
outputfolder = os.path.join(os.getcwd(), 'results', 'consolidated')
outputfolder_graph = os.path.join(os.getcwd(), 'results', 'consolidated', 'graphs')
outputfolder_table = os.path.join(os.getcwd(), 'results', 'consolidated', 'tables')
if not os.path.exists(outputfolder_graph):
    os.makedirs(outputfolder_graph)
if not os.path.exists(outputfolder_table):
    os.makedirs(outputfolder_table)
#print('Saving files on folder: ' + outputfolder)

### Process consolidated result files

In [None]:
df_rpfs_ccg = pd.read_pickle(rpfs_ccg_file)  # Robust PFSP Budget solutions only
df_rpfs_ccg.drop(columns=['executionId'], inplace=True)
df_rpfs_ccg = df_rpfs_ccg.reset_index()

In [None]:
df_rpfs_grasp = pd.read_pickle(rpfs_grasp_file)  # Robust PFSP Budget solutions only
df_rpfs_grasp.drop(columns=['execution_id'], inplace=True)
df_rpfs_grasp = df_rpfs_grasp.reset_index()

**Robust dataframe: calculating new fields.**

In [None]:
df_rpfs_ccg['optimal'] = df_rpfs_ccg['is_optimal'] & df_rpfs_ccg['validated'] & (df_rpfs_ccg['gap'] <= 1e-5)
df_rpfs_ccg['time_limit'] = 7200.0
df_rpfs_ccg['time_limit_2'] = 7200.0 * 2
df_rpfs_ccg['mp_total_time'] = (df_rpfs_ccg['n'] < 15).astype(int) * np.minimum(df_rpfs_ccg['mp_total_time'], df_rpfs_ccg['time_limit']) + (df_rpfs_ccg['n'] >= 15).astype(int) * np.minimum(df_rpfs_ccg['mp_total_time'], df_rpfs_ccg['time_limit_2'])
df_rpfs_ccg['time'] = df_rpfs_ccg['mp_total_time'] + df_rpfs_ccg['sp_total_time']
df_rpfs_ccg['gap'] = df_rpfs_ccg['gap'] * 100.0
df_rpfs_ccg['RobCost_worstcase'] = df_rpfs_ccg['cmax_validation']
df_rpfs_ccg = df_rpfs_ccg.rename(columns={"budget_Gamma": "RobCost_Gamma"})


In [None]:
df_rpfs_grasp['time'] = df_rpfs_grasp['time_spent']
df_rpfs_grasp['RobCost_worstcase'] = df_rpfs_grasp['solution_value']
df_rpfs_grasp['RobCost_Gamma'] = df_rpfs_grasp['Gamma']

In [None]:
df_rpfs_ccg.tail(4)

In [None]:
df_rpfs_ccg.info()

### Checking the Robust PFSP Budget C&CG solutions dataframe

In [None]:
df_rpfs_ccg.head(2)

### Checking the Robust PFSP Budget GRASP solutions dataframe

In [None]:
df_rpfs_grasp.head(4)

# 1. Pre-process C&CG results dataframe

Replace model names with the name used in table presentation:

In [None]:
df_rpfs_ccg['model'].replace({'wagner': 'Wagner', 'wilson': 'Wilson'}, inplace=True)

Obtain list of C&CG models, instance types

In [None]:
model_list = df_rpfs_ccg['model'].unique().tolist()
instance_type_list = df_rpfs_ccg['instance_type'].unique().tolist()
print(model_list)
print(instance_type_list)

Add a new column containing the instance size as string

In [None]:
df_temp = df_rpfs_ccg
(df_temp['n'].astype(str) + 'x' + df_temp['m'].astype(str)).unique()

In [None]:
df_rpfs_ccg.columns

In [None]:
df_temp = df_rpfs_ccg
df_temp['instance_size'] = df_temp['n'].astype(str) + 'x' + df_temp['m'].astype(str)
df_rpfs_ccg = df_temp.set_index(['model', 'n', 'm', 'alpha_str', 'seq', 'RobCost_Gamma', 'instance_type'])
df_rpfs_ccg

Treating errors in the `gap` column

In [None]:
df_rpfs_ccg['gap'].describe()

In [None]:
df_check = df_rpfs_ccg.reset_index()[['model', 'n', 'm', 'alpha_str', 'seq', 'RobCost_Gamma', 'instance_name', 'gap', 'cmax', 'RobCost_worstcase', 'lb']]
df_check[(df_check['gap'] < -1e-5)].to_csv(os.path.join(os.getcwd(), 'results', 'negative_gap_list.csv'))


In [None]:
df_rpfs_ccg['gap'] = df_rpfs_ccg['gap'].apply(lambda x: np.maximum(x, 0.0))

In [None]:
df_rpfs_ccg['gap'].describe()

### Remove alpha = R400 instances

In [None]:
df_rpfs_ccg = df_rpfs_ccg.reset_index()
len(df_rpfs_ccg[df_rpfs_ccg['alpha_str'] == 'R400'].index)

In [None]:
df_rpfs_ccg = df_rpfs_ccg[df_rpfs_ccg['alpha_str'] != 'R400']
len(df_rpfs_ccg.index)

### Remove case study (non-ying) instances 

In [None]:
df_rpfs_ccg.reset_index()['seq'].unique()

## Table 1. C&CG Performance given all instances 

Model-wise Robust PFSP C&CG performance comparison, given all instances.

* % Best Performance is the percentage of instances solved to optimality where the model achieved shorter execution time, when compared to the other models; 

* % Solved contains the percentage of instances solved within the time limit; 

* % Solved < n x m > represents the percentage of solved instances of size n x m; 

* Avg. % Gap is the average percentage gap of solutions from instances not solved to optimality; 

* Median time is the median execution time, in seconds; 

* Median iterations is the median of the number of iterations performed.

In [None]:
def calculate_perc_best_performance(df, model):
    df_model = df.reset_index()
    df_model = df_model[df_model['model'] == model]
    df_model = df_model[df_model['optimal'] == True]    
    df_model = df_model.set_index(['n', 'm', 'alpha', 'seq', 'RobCost_Gamma', 'instance_type'])
    if len(df_model.index) == 0:
        return np.nan
    
    df_others = df.reset_index()
    df_others = df_others[df_others['model'] != model]
    df_others = df_others[df_others['optimal'] == True] 
    group_columns = ['n', 'm', 'alpha', 'seq', 'RobCost_Gamma', 'instance_type']
    df_best_performance = df_others[group_columns + ['time']].groupby(by=group_columns).min()['time']
    df_best_performance = df_best_performance.to_frame()
    if len(df_best_performance.index) == 0:
        return np.nan
    
    df_compare = df_best_performance.join(df_model, how='inner', 
                                                     on=group_columns,
                                                     lsuffix='_best')
    df_compare['time_wins'] = (df_compare['time'] < df_compare['time_best']).astype(int)
    return np.round(100.0 * df_compare['time_wins'].sum() / len(df_compare.index), 2)

In [None]:
def calculate_perc_solved(df, model, instance_type = None, instance_size = None, alpha = None):
    df_model = df.reset_index()
    if model is not None:
        df_model = df_model[df_model['model'] == model]
    df_ = df_model
    if instance_type is not None:
        df_ = df_[df_['instance_type'] == instance_type]
    if instance_size is not None:
        df_ = df_[df_['instance_size'] == instance_size]
    if alpha is not None:
        df_ = df_[df_['alpha'] == alpha]
    if len(df_.index) > 0:
        return np.round(100.0 * len(df_[(df_['optimal'] == True)].index) / len(df_.index), 2)
    else:
        return np.nan

In [None]:
# Avg. % Gap is the average percentage gap of solutions from instances not solved to optimality
def calculate_avg_perc_gap(df, model):
    df_model = df.reset_index()
    if model is not None:
        df_model = df_model[df_model['model'] == model]
    df_model = df_model[df_model['optimal'] == False]
    if df_model['gap'].mean() >= 1e-2:
        return np.round(df_model['gap'].mean(), 2)
    else:
        return df_model['gap'].mean()

In [None]:
def calculate_median_time(df, model, time_col_name):
    df_model = df.reset_index()
    if model is not None:
        df_model = df_model[df_model['model'] == model]
    return np.round(df_model[time_col_name].median(), 2)

In [None]:
def calculate_avg_time(df, model, time_col_name):
    df_model = df.reset_index()
    if model is not None:
        df_model = df_model[df_model['model'] == model]
    return np.round(df_model[time_col_name].mean(), 2)

In [None]:
def calculate_std(df, model, col_name):
    df_model = df.reset_index()
    if model is not None:
        df_model = df_model[df_model['model'] == model]
    if col_name == 'gap':
        df_model = df_model[df_model['optimal'] == False]
    if df_model[col_name].std() >= 1e-2:
        return np.round(df_model[col_name].std(), 2)
    else:
        return df_model[col_name].std()

In [None]:
import numpy as np
import scipy.stats

def mean_confidence_interval(df, model, col_name, confidence=0.95):
    df_model = df.reset_index()
    if model is not None:
        df_model = df_model[df_model['model'] == model]
    if col_name == 'gap':
        df_model = df_model[df_model['optimal'] == False]
    data = df_model[col_name]
    a = 1.0 * np.array(data)
    n = len(a)
    m, se = np.mean(a), scipy.stats.sem(a)
    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
    lb = np.round(m-h, 2)
    ub = np.round(m+h, 2)
    if np.isnan(lb) or np.isnan(ub):
        return '-'
    return '[{}, {}]'.format(lb, ub)

In [None]:
def calculate_median_iterations(df, model):
    df_model = df.reset_index()
    if model is not None:
        df_model = df_model[df_model['model'] == model]
    return np.round(df_model['iterations'].median(), 2)

In [None]:
def calculate_avg_iterations(df, model):
    df_model = df.reset_index()
    if model is not None:
        df_model = df_model[df_model['model'] == model]
    return np.round(df_model['iterations'].mean(), 2)

In [None]:
def calculate_num_instances(df, model):
    df_model = df.reset_index()
    if model is not None:
        df_model = df_model[df_model['model'] == model]
    return len(df_model.index)

## 1.1. All instances together, performance summary

In [None]:
model_stats = dict()
df_base = df_rpfs_ccg
exclude_model_list = []
df_base = df_base[~(df_base['model'].isin(exclude_model_list))]
model_list_reduced = [_ for _ in model_list if _ not in exclude_model_list]
for model in model_list_reduced:
    model_stats[model] = dict()
    model_stats[model]['% Best Performance'] = calculate_perc_best_performance(df_base, model)
    model_stats[model]['% Solved'] = calculate_perc_solved(df_base, model)  # given all instances
    for instance_type in instance_type_list:  # group by instance type and size
        df_itype = df_base
        df_itype = df_itype[(df_itype['instance_type'] == instance_type)]
        instance_size_list = df_itype['instance_size'].unique().tolist()
        for instance_size in instance_size_list:
            model_stats[model]['% Solved '+ instance_size] = calculate_perc_solved(df_base, model, instance_type, instance_size)
    model_stats[model]['Avg. % gap'] = calculate_avg_perc_gap(df_base, model)
    model_stats[model]['Median time'] = calculate_median_time(df_base, model, 'time')
    model_stats[model]['Median time MP'] = calculate_median_time(df_base, model, 'mp_total_time')
    model_stats[model]['Median time SP'] = calculate_median_time(df_base, model, 'sp_total_time')
    model_stats[model]['Median iterations'] = calculate_median_iterations(df_base, model)

In [None]:
model_stats_df = pd.DataFrame.from_dict(model_stats)
model_stats_df

### Export to Tableau, after melt

In [None]:
model_stats_df.transpose().reset_index()

In [None]:
value_vars = model_stats_df.transpose().columns
df_melt_model_stats_df = pd.melt(model_stats_df.transpose().reset_index(), id_vars=['index'], value_vars=value_vars)
df_melt_model_stats_df['Model'] = df_melt_model_stats_df['index']
df_melt_model_stats_df.to_excel(os.path.join(outputfolder_table, '1_ccg_cmax_model_stats_summary.xlsx'))

## Table 2. Performance per instance group and model

Model-wise Robust PFSP C&CG performance comparison, per instance group.

* % Best Performance is the percentage of instances solved to optimality where the model achieved shorter execution time, when compared to the other models; 

* % Solved contains the percentage of instances solved within the time limit; 

* Avg. % Gap is the average percentage gap of solutions from instances not solved to optimality; 

* Avg. time and Std. dev. of time are the mean and standard deviation in solution time (s), respectively;

* Avg. iterations and Std. dev. of iterations are the mean and standard deviation of the number of iterations performed.

In [None]:
per_instance_stats = dict()
for instance_type in instance_type_list:  # group by instance type and size
    df_base = df_rpfs_ccg.reset_index()
    exclude_model_list = []
    df_base = df_base[~(df_base['model'].isin(exclude_model_list))]
    model_list_reduced = [_ for _ in model_list if _ not in exclude_model_list]
    df_itype = df_base
    df_itype = df_itype[(df_itype['instance_type'] == instance_type)]
    instance_size_list = df_itype['instance_size'].unique().tolist()
    for instance_size in instance_size_list:
        df_instance = df_itype[df_itype['instance_size'] == instance_size]
        for model in model_list_reduced:
            per_instance_stats[(instance_type,instance_size,model)] = dict()
            per_instance_stats[(instance_type,instance_size,model)]['% Best Performance'] = calculate_perc_best_performance(df_instance, model)
            per_instance_stats[(instance_type,instance_size,model)]['% Solved'] = calculate_perc_solved(df_base, model, instance_type, instance_size)
            per_instance_stats[(instance_type,instance_size,model)]['Avg. % gap'] = calculate_avg_perc_gap(df_instance, model)
            per_instance_stats[(instance_type,instance_size,model)]['Std. dev. of % gap'] = calculate_std(df_instance, model, 'gap')
            per_instance_stats[(instance_type,instance_size,model)]['95% CI of % gap'] = mean_confidence_interval(df_instance, model, 'gap')
            per_instance_stats[(instance_type,instance_size,model)]['Avg. time'] = calculate_avg_time(df_instance, model, 'time')
            per_instance_stats[(instance_type,instance_size,model)]['Std. dev. of time'] = calculate_std(df_instance, model, 'time')
            per_instance_stats[(instance_type,instance_size,model)]['Avg. MP time'] = calculate_avg_time(df_instance, model, 'mp_total_time')
            per_instance_stats[(instance_type,instance_size,model)]['Avg. SP time'] = calculate_avg_time(df_instance, model, 'sp_total_time')
            per_instance_stats[(instance_type,instance_size,model)]['Avg. iterations'] = calculate_avg_iterations(df_instance, model)
            per_instance_stats[(instance_type,instance_size,model)]['Std. dev. of iterations'] = calculate_std(df_instance, model, 'iterations')
            per_instance_stats[(instance_type,instance_size,model)]['# instances solved'] = calculate_num_instances(df_instance, model)

In [None]:
# https://stackoverflow.com/questions/57606801/pandas-style-options-to-latex

In [None]:
pd.set_option('display.max_columns', None)
allowed_keys = [(x, y, z) for (x, y, z) in per_instance_stats.keys()]
per_instance_stats1 = { your_key: per_instance_stats[your_key] for your_key in allowed_keys }
df_table3a = pd.DataFrame.from_dict(per_instance_stats1)
#df_table3a.columns = df_table3a.columns.droplevel()
df_table3a

In [None]:
pd.set_option('display.max_columns', None)
allowed_keys = [(x, y, z) for (x, y, z) in per_instance_stats.keys() if (y in ['200x2', '10x4', '10x5', '15x5', '20x5'])]
per_instance_stats1 = { your_key: per_instance_stats[your_key] for your_key in allowed_keys }
df_table3a = pd.DataFrame.from_dict(per_instance_stats1)
#df_table3a.columns = df_table3a.columns.droplevel()
df_table3a

#### Export table to Tableau, after melt

In [None]:
df_table3 = pd.DataFrame.from_dict(per_instance_stats)
#df_table3.columns = df_table3.columns.droplevel()
value_vars = df_table3.transpose().columns
df_melt_table3 = pd.melt(df_table3.transpose().reset_index(), id_vars=['level_0', 'level_1', 'level_2'], value_vars=value_vars)
df_melt_table3['Instance type'] = df_melt_table3['level_0']
df_melt_table3['Instance size'] = df_melt_table3['level_1']
df_melt_table3['Model'] = df_melt_table3['level_2']
df_melt_table3.to_excel(os.path.join(outputfolder_table, '2_ccg_cmax_model_stats_per_instance.xlsx'))


### Table 3. Performance per Instance group, alpha and model

In [None]:
per_instance_stats = dict()
alpha_list = df_rpfs_ccg.reset_index()['alpha_str'].unique().tolist()
for instance_type in instance_type_list:  # group by instance type and size
    df_base = df_rpfs_ccg.reset_index()
    exclude_model_list = []
    df_base = df_base[~(df_base['model'].isin(exclude_model_list))]
    model_list_reduced = [_ for _ in model_list if _ not in exclude_model_list]
    df_itype = df_base
    df_itype = df_itype[(df_itype['instance_type'] == instance_type)]
    instance_size_list = df_itype['instance_size'].unique().tolist()
    for instance_size in instance_size_list:
        df_instance = df_itype[df_itype['instance_size'] == instance_size]
        for alpha in alpha_list:
            df_alpha = df_instance[df_instance['alpha_str'] == alpha]
            for model in model_list_reduced:
                key = (instance_type,instance_size,alpha,model)
                per_instance_stats[key] = dict()
                per_instance_stats[key]['% Best Performance'] = calculate_perc_best_performance(df_alpha, model)
                per_instance_stats[key]['% Solved'] = calculate_perc_solved(df_base, model, instance_type, instance_size, alpha)
                per_instance_stats[key]['Avg. % gap'] = calculate_avg_perc_gap(df_alpha, model)
                per_instance_stats[key]['Std. dev. of % gap'] = calculate_std(df_alpha, model, 'gap')
                per_instance_stats[key]['95% CI of % gap'] = mean_confidence_interval(df_alpha, model, 'gap')
                per_instance_stats[key]['Avg. time'] = calculate_avg_time(df_alpha, model, 'time')
                per_instance_stats[key]['Std. dev. of time'] = calculate_std(df_alpha, model, 'time')
                per_instance_stats[key]['Avg. MP time'] = calculate_avg_time(df_alpha, model, 'mp_total_time')
                per_instance_stats[key]['Avg. SP time'] = calculate_avg_time(df_alpha, model, 'sp_total_time')
                per_instance_stats[key]['Avg. iterations'] = calculate_avg_iterations(df_alpha, model)
                per_instance_stats[key]['Std. dev. of iterations'] = calculate_std(df_alpha, model, 'iterations')
                per_instance_stats[key]['# instances solved'] = calculate_num_instances(df_alpha, model)

#### Export table to Tableau, after melt

In [None]:
df_table_alpha = pd.DataFrame.from_dict(per_instance_stats)
#df_table_alpha.columns = df_table_alpha.columns.droplevel()
value_vars = df_table_alpha.transpose().columns
#df_table_alpha.transpose()
df_melt_table3 = pd.melt(df_table_alpha.transpose().reset_index(), id_vars=['level_0', 'level_1', 'level_2', 'level_3'], value_vars=value_vars)
df_melt_table3['Instance type'] = df_melt_table3['level_0']
df_melt_table3['Instance size'] = df_melt_table3['level_1']
df_melt_table3['Alpha'] = df_melt_table3['level_2']
df_melt_table3['Model'] = df_melt_table3['level_3']
df_melt_table3
df_melt_table3.to_excel(os.path.join(outputfolder_table, '3_ccg_cmax_model_stats_per_instance_and_alpha.xlsx'))


# 2. Pre-process GRASP results dataframe

Add a new column containing the instance size as string

In [None]:
df_temp = df_rpfs_grasp
(df_temp['n'].astype(str) + 'x' + df_temp['m'].astype(str)).unique()

In [None]:
df_rpfs_grasp.columns

In [None]:
df_temp = df_rpfs_grasp
df_temp['instance_size'] = df_temp['n'].astype(str) + 'x' + df_temp['m'].astype(str)
df_rpfs_grasp = df_temp.set_index(['n', 'm', 'alpha_str', 'seq', 'RobCost_Gamma', 'instance_type'])
df_rpfs_grasp

### Remove alpha = R400 instances

In [None]:
df_rpfs_grasp = df_rpfs_grasp.reset_index()
len(df_rpfs_grasp[df_rpfs_grasp['alpha_str'] == 'R400'].index)

In [None]:
df_rpfs_grasp = df_rpfs_grasp[df_rpfs_grasp['alpha_str'] != 'R400']
len(df_rpfs_grasp.index)

### Table 4. GRASP performance per instance group and model

Model-wise Robust PFSP GRASP performance comparison, per instance group.

* % Best Performance is the percentage of instances solved to optimality where the model achieved shorter execution time, when compared to the other models; 

* Avg. time and Std. dev. of time are the mean and standard deviation in solution time (s), respectively;

* Avg. iterations and Std. dev. of iterations are the mean and standard deviation of the number of iterations performed.

In [None]:
grasp_per_instance_stats = dict()
for instance_type in instance_type_list:  # group by instance type and size
    df_base = df_rpfs_grasp.reset_index()
    df_itype = df_base
    df_itype = df_itype[(df_itype['instance_type'] == instance_type)]
    instance_size_list = df_itype['instance_size'].unique().tolist()
    for instance_size in instance_size_list:
        df_instance = df_itype[df_itype['instance_size'] == instance_size]
        grasp_per_instance_stats[(instance_type,instance_size)] = dict()
        model = None
        
        #grasp_per_instance_stats[(instance_type,instance_size)]['% Best Performance'] = calculate_perc_best_performance(df_instance, model)
        #grasp_per_instance_stats[(instance_type,instance_size,model)]['% Solved'] = calculate_perc_solved(df_base, model, instance_type, instance_size)
        #grasp_per_instance_stats[(instance_type,instance_size,model)]['Avg. % gap'] = calculate_avg_perc_gap(df_instance, model)
        #grasp_per_instance_stats[(instance_type,instance_size,model)]['Std. dev. of % gap'] = calculate_std(df_instance, model, 'gap')
        #grasp_per_instance_stats[(instance_type,instance_size,model)]['95% CI of % gap'] = mean_confidence_interval(df_instance, model, 'gap')
        grasp_per_instance_stats[(instance_type,instance_size)]['Avg. time'] = calculate_avg_time(df_instance, model, 'time')
        grasp_per_instance_stats[(instance_type,instance_size)]['Std. dev. of time'] = calculate_std(df_instance, model, 'time')
        #grasp_per_instance_stats[(instance_type,instance_size,model)]['Avg. MP time'] = calculate_avg_time(df_instance, model, 'mp_total_time')
        #grasp_per_instance_stats[(instance_type,instance_size,model)]['Avg. SP time'] = calculate_avg_time(df_instance, model, 'sp_total_time')
        grasp_per_instance_stats[(instance_type,instance_size)]['Avg. iterations'] = calculate_avg_iterations(df_instance, model)
        grasp_per_instance_stats[(instance_type,instance_size)]['Std. dev. of iterations'] = calculate_std(df_instance, model, 'iterations')
        grasp_per_instance_stats[(instance_type,instance_size)]['# solutions obtained'] = calculate_num_instances(df_instance, model)
            

In [None]:
# https://stackoverflow.com/questions/57606801/pandas-style-options-to-latex

In [None]:
pd.set_option('display.max_columns', None)
allowed_keys = [(x, y) for (x, y) in grasp_per_instance_stats.keys()]
per_instance_stats1 = { your_key: grasp_per_instance_stats[your_key] for your_key in allowed_keys }
df_table4 = pd.DataFrame.from_dict(per_instance_stats1)
#df_table4.columns = df_table3a.columns.droplevel()
df_table4

### Table 5. C&CG vs. GRASP performance per instance group and model

In [None]:
vs_per_instance_stats = dict()
for instance_type in instance_type_list:  # group by instance type and size
    print('Instance type = ' + instance_type)
    df_grasp = df_rpfs_grasp.reset_index()
    df_grasp = df_grasp[(df_grasp['instance_type'] == instance_type)]
    instance_size_list = df_grasp['instance_size'].unique().tolist()
    
    df_ccg = df_rpfs_ccg.reset_index()
    df_ccg = df_ccg[(df_ccg['instance_type'] == instance_type)]
    instance_size_list = df_ccg['instance_size'].unique().tolist()
    
    join_columns = ['instance_type', 'n', 'm', 'alpha_str', 'Gamma%']  # instance_name
    for instance_size in instance_size_list:
        print('Instance size = ' + instance_size)
        vs_per_instance_stats[(instance_type,instance_size,model)] = dict()
        df_grasp_instance = df_grasp[df_grasp['instance_size'] == instance_size]
        for model in ['Wilson', 'Wagner']:
            print('Model = ' + model)
            df_ccg_instance = df_ccg[(df_ccg['instance_size'] == instance_size) & (df_ccg['model'] == model)]
            df_instance = df_ccg_instance.merge(df_grasp_instance, on=join_columns, suffixes=('_ccg', '_grasp'))
            df_instance['gap'] = df_instance['RobCost_worstcase_grasp'] - df_instance['RobCost_worstcase_ccg']
            df_instance['gap%'] = df_instance['gap'] / df_instance['RobCost_worstcase_ccg']
            print(df_instance[join_columns + ['gap', 'gap%']])
            
            break
            #vs_per_instance_stats[(instance_type,instance_size,model)]['% Best Performance'] = calculate_perc_best_performance(df_instance, model)
            #vs_per_instance_stats[(instance_type,instance_size,model)]['% Solved'] = calculate_perc_solved(df_base, model, instance_type, instance_size)
            #vs_per_instance_stats[(instance_type,instance_size,model)]['Avg. % gap'] = calculate_avg_perc_gap(df_instance, model)
            #vs_per_instance_stats[(instance_type,instance_size,model)]['Std. dev. of % gap'] = calculate_std(df_instance, model, 'gap')
            #vs_per_instance_stats[(instance_type,instance_size,model)]['95% CI of % gap'] = mean_confidence_interval(df_instance, model, 'gap')
            vs_per_instance_stats[(instance_type,instance_size,model)]['Avg. time'] = calculate_avg_time(df_instance, model, 'time')
            vs_per_instance_stats[(instance_type,instance_size,model)]['Std. dev. of time'] = calculate_std(df_instance, model, 'time')
            #vs_per_instance_stats[(instance_type,instance_size,model)]['Avg. MP time'] = calculate_avg_time(df_instance, model, 'mp_total_time')
            #vs_per_instance_stats[(instance_type,instance_size,model)]['Avg. SP time'] = calculate_avg_time(df_instance, model, 'sp_total_time')
            vs_per_instance_stats[(instance_type,instance_size,model)]['Avg. iterations'] = calculate_avg_iterations(df_instance, model)
            vs_per_instance_stats[(instance_type,instance_size,model)]['Std. dev. of iterations'] = calculate_std(df_instance, model, 'iterations')
            vs_per_instance_stats[(instance_type,instance_size,model)]['# solutions obtained'] = calculate_num_instances(df_instance, model)
        break
            