## 2RPFS Problem (Cmax objective) - Tables and Graphs

Before running this, notebook, please run notebook 0.1.

In [None]:
import pandas as pd
import numpy as np
import os, fnmatch
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
import glob
import seaborn as sns
import gzip
import matplotlib.style as style
from matplotlib.path import Path
from matplotlib.patches import BoxStyle

%matplotlib inline

In [None]:
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

In [None]:
linestyle_tuple = [
     ('dotted',                (0, (1, 1))),
     ('dashed',                (0, (5, 5))),
     ('densely dashed',        (0, (5, 1))),
     ('dashdotdotted',         (0, (3, 5, 1, 5, 1, 5))),
     ('densely dashdotdotted', (0, (3, 1, 1, 1, 1, 1))),

     ('dashdotted',            (0, (3, 5, 1, 5))),
     ('densely dashdotted',    (0, (3, 1, 1, 1))),
     
     ('loosely dashed',        (0, (5, 10))),
     ('loosely dashdotted',    (0, (3, 10, 1, 10))),
     

     ('loosely dashdotdotted', (0, (3, 10, 1, 10, 1, 10))),
     ('densely dotted',        (0, (1, 1))),
     ('loosely dotted',        (0, (1, 10)))]

In [None]:
# https://stackoverflow.com/questions/51483901/is-there-a-way-to-extend-the-solid-color-background-to-the-full-width-of-the-pag
class ExtendedTextBox(BoxStyle._Base):
    """
    An Extended Text Box that expands to the axes limits 
                        if set in the middle of the axes
    """

    def __init__(self, pad=0.3, width=500.):
        """
        width: 
            width of the textbox. 
            Use `ax.get_window_extent().width` 
                   to get the width of the axes.
        pad: 
            amount of padding (in vertical direction only)
        """
        self.width=width
        self.pad = pad
        super(ExtendedTextBox, self).__init__()

    def transmute(self, x0, y0, width, height, mutation_size):
        """
        x0 and y0 are the lower left corner of original text box
        They are set automatically by matplotlib
        """
        # padding
        pad = mutation_size * self.pad

        # we add the padding only to the box height
        height = height + 2.*pad
        # boundary of the padded box
        y0 = y0 - pad
        y1 = y0 + height
        _x0 = x0
        x0 = _x0 +width /2. - self.width/2.
        x1 = _x0 +width /2. + self.width/2.

        cp = [(x0, y0),
              (x1, y0), (x1, y1), (x0, y1),
              (x0, y0)]

        com = [Path.MOVETO,
               Path.LINETO, Path.LINETO, Path.LINETO,
               Path.CLOSEPOLY]

        path = Path(cp, com)

        return path

### List files in the result folder 

In [None]:
resultfolder = os.path.join(os.getcwd(), 'results', 'consolidated')
rpfs_file = os.path.join(resultfolder, 'RPFS_TWCT_all_results.pkl.gz')

### Create the output folder 

In [None]:
outputfolder = os.path.join(os.getcwd(), 'results', 'consolidated')
outputfolder_graph = os.path.join(os.getcwd(), 'results', 'consolidated', 'graphs')
outputfolder_table = os.path.join(os.getcwd(), 'results', 'consolidated', 'tables')
if not os.path.exists(outputfolder_graph):
    os.makedirs(outputfolder_graph)
if not os.path.exists(outputfolder_table):
    os.makedirs(outputfolder_table)
#print('Saving files on folder: ' + outputfolder)

### Process consolidated CSV result files

In [None]:
df_rpfs = pd.read_pickle(rpfs_file)  # Robust PFSP Budget solutions only
df_rpfs.drop(columns=['executionId'], inplace=True)

**Robust dataframe: calculating new fields.**

In [None]:
df_rpfs['optimal'] = df_rpfs['is_optimal'] & df_rpfs['validated'] & (df_rpfs['gap'] <= 1e-8)
df_rpfs['time_limit'] = 7200.0
df_rpfs['time'] = np.minimum(df_rpfs['time_spent'], df_rpfs['time_limit'])
df_rpfs['gap'] = df_rpfs['gap'] * 100.0
df_rpfs['RobCost_worstcase'] = df_rpfs['wct_validation']
df_rpfs = df_rpfs.rename(columns={"budget_Gamma": "RobCost_Gamma"})
df_rpfs.head(2)

In [None]:
df_rpfs.info()

### Checking the Robust PFSP Budget solutions dataframe

In [None]:
df_rpfs.head(2)

# Tables

Obtain list of C&CG models, instance types

In [None]:
model_list = df_rpfs.reset_index()['model'].unique().tolist()
instance_type_list = df_rpfs.reset_index()['instance_type'].unique().tolist()
print(model_list)
print(instance_type_list)

Add a new column containing the instance size as string

In [None]:
df_temp = df_rpfs.reset_index()
(df_temp['n'].astype(str) + 'x' + df_temp['m'].astype(str)).unique()

In [None]:
df_temp = df_rpfs.reset_index()
df_temp['instance_size'] = df_temp['n'].astype(str) + 'x' + df_temp['m'].astype(str)
df_rpfs = df_temp.set_index(['model', 'n', 'm', 'alpha', 'seq', 'budget_Gamma', 'instance_type'])
df_rpfs

Treating errors in the `gap` column

In [None]:
df_rpfs['gap'].describe()

In [None]:
df_rpfs['gap'] = df_rpfs['gap'].apply(lambda x: np.maximum(x, 0.0))

In [None]:
df_rpfs['gap'].describe()

## Table 2. Performance given all instances 

Model-wise Robust PFSP C&CG performance comparison, given all instances.

* % Best Performance is the percentage of instances solved to optimality where the model achieved shorter execution time, when compared to the other models; 

* % Solved contains the percentage of instances solved within the time limit; 

* % Solved < n > represents the percentage of solved instances of size n; 

* Avg. % Gap is the average percentage gap of solutions from instances not solved to optimality; 

* Median time is the median execution time, in seconds; 

* Median iterations is the median of the number of iterations performed.

In [None]:
def calculate_perc_best_performance(df, model):
    df_model = df.reset_index()
    df_model = df_model[df_model['model'] == model]
    df_model = df_model[df_model['optimal'] == True]    
    df_model = df_model.set_index(['n', 'm', 'alpha', 'seq', 'budget_Gamma', 'instance_type'])
    
    df_others = df.reset_index()
    df_others = df_others[df_others['model'] != model]
    df_others = df_others[df_others['optimal'] == True]   
    df_best_performance = df_others.groupby(by=['n', 'm', 'alpha', 'seq', 'budget_Gamma', 'instance_type']).min()['time']
    df_best_performance = df_best_performance.to_frame()
    
    df_compare = df_best_performance.join(df_model, how='inner', 
                                                     on=['n', 'm', 'alpha', 'seq', 'budget_Gamma', 'instance_type'],
                                                     lsuffix='_best')
    df_compare['time_wins'] = (df_compare['time'] < df_compare['time_best']).astype(int)
    return np.round(100.0 * df_compare['time_wins'].sum() / len(df_compare.index), 2)

In [None]:
def calculate_perc_solved(df, model, instance_type = None, instance_size = None):
    df_model = df.reset_index()
    df_model = df_model[df_model['model'] == model]
    df_ = df_model
    if instance_type is not None:
        df_ = df_[df_['instance_type'] == instance_type]
    if instance_size is not None:
        df_ = df_[df_['instance_size'] == instance_size]
    if len(df_.index) > 0:
        return np.round(100.0 * len(df_[(df_['optimal'] == True)].index) / len(df_.index), 2)
    else:
        return np.nan

In [None]:
# Avg. % Gap is the average percentage gap of solutions from instances not solved to optimality
def calculate_avg_perc_gap(df, model):
    df_model = df.reset_index()
    df_model = df_model[df_model['model'] == model]
    df_model = df_model[df_model['optimal'] == False]
    return np.round(df_model['gap'].mean(), 2)

In [None]:
def calculate_median_time(df, model):
    df_model = df.reset_index()
    df_model = df_model[df_model['model'] == model]
    return np.round(df_model['time'].median(), 2)

In [None]:
def calculate_median_iterations(df, model):
    df_model = df.reset_index()
    df_model = df_model[df_model['model'] == model]
    return np.round(df_model['iterations'].median(), 2)

In [None]:
model_stats = dict()
for model in model_list:
    model_stats[model] = dict()
    model_stats[model]['%Best Performance'] = calculate_perc_best_performance(df_rpfs, model)
    model_stats[model]['%Solved'] = calculate_perc_solved(df_rpfs, model)  # given all instances
    for instance_type in instance_type_list:  # group by instance type and size
        df_itype = df_rpfs.reset_index()
        df_itype = df_itype[(df_itype['instance_type'] == instance_type)]
        instance_size_list = df_itype['instance_size'].unique().tolist()
        for instance_size in instance_size_list:
            model_stats[model]['%Solved '+instance_type+' '+ instance_size] = calculate_perc_solved(df_rpfs, model, instance_type, instance_size)
    model_stats[model]['Avg % gap'] = calculate_avg_perc_gap(df_rpfs, model)
    model_stats[model]['Median time'] = calculate_median_time(df_rpfs, model)
    model_stats[model]['Median iterations'] = calculate_median_iterations(df_rpfs, model)

In [None]:
model_stats

In [None]:
table = pd.pivot_table(df_rob_self, columns=['n', 'model_x'], values=['time_x', 'gap_x', 'iterations_x', 'x_wins_y_time'],
                       aggfunc={'time_x' : ['mean', 'std'], 'gap_x' : 'mean', 'iterations_x' : ['mean', 'std'],
                               'x_wins_y_time' : ['sum']})  # , margins=True, fill_value=0)
table['perc_x_wins_y_time'] = table['x_wins_y_time'] * 100 / 1250
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    display(table)