## 2RPFS Problem (Cmax objective) - Tables and Graphs

Before running this, notebook, please run notebooks 0.1 and 0.2 (in this order).

In [None]:
import pandas as pd
import numpy as np
import os, fnmatch
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
import glob
import seaborn as sns

%matplotlib inline

In [None]:
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

### List files in the result folder 

In [None]:
resultfolder = os.path.join(os.getcwd(), 'results', 'consolidated')
rpfs_file = os.path.join(resultfolder, '2RPFS_Cmax_all_results.csv')
det_file = os.path.join(resultfolder, 'PFSP_Cmax_deterministic_all_results.csv')
stoc_file = os.path.join(resultfolder, 'simgrasp_cmax_ying_stochgrasp_results.csv')

### Create the output folder 

In [None]:
outputfolder = os.path.join(os.getcwd(), 'results', 'consolidated')
if not os.path.exists(outputfolder):
    os.makedirs(outputfolder)
#print('Saving files on folder: ' + outputfolder)

### Process consolidated CSV result files

In [None]:
df_rpfs = pd.read_csv(rpfs_file, delimiter=';')
df_dpfs = pd.read_csv(det_file, delimiter=';')
df_rpfs.drop(columns=['executionId'], inplace=True)
df_dpfs.drop(columns=['executionId'], inplace=True)
df_stoc = pd.read_csv(stoc_file, delimiter=',')

Robust dataframe: calculating new fields

In [None]:
df_rpfs['optimal'] = df_rpfs['is_optimal'] & df_rpfs['validated'] & (df_rpfs['gap'] <= 1e-5)
df_rpfs['time_limit'] = 7200.0
df_rpfs['time'] = np.minimum(df_rpfs['time_spent'], df_rpfs['time_limit'])
df_rpfs['gap'] = df_rpfs['gap'] * 100
df_rpfs['worstcase_cost'] = df_rpfs['cmax_dp']

In [None]:
df_rpfs_wagner = df_rpfs[(df_rpfs['model'] == 'Wagner')]
df_rpfs_wilson = df_rpfs[(df_rpfs['model'] == 'Wilson')]

In [None]:
df_rpfs[['time', 'time_spent']]

In [None]:
df_rpfs.info()

### Analyzing the stochastic solutions dataframe (SimGRASP) 

In [None]:
df_stoc.info()

In [None]:
def split_budget_gamma_column(df):
    # new data frame with split value columns 
    new = df["budget_Gamma"].str.split(" ", n = 1, expand = True) 
    # making separate first name column from new data frame 
    df["Gamma1"]= new[0] 
    # making separate last name column from new data frame 
    df["Gamma2"]= new[1] 
    # convert Gamma columns to numeric
    df["Gamma1"] = pd.to_numeric(df["Gamma1"], errors='coerce')
    df["Gamma2"] = pd.to_numeric(df["Gamma2"], errors='coerce')
    return df

In [None]:
df_stoc['instance_name'] = df_stoc['rob_pfsp_instance']

budget_list = []
for g1 in [20, 40, 60, 80, 100]:
    for g2 in [20, 40, 60, 80, 100]:
        budget_list.append('{} {}'.format(g1, g2))
df_ssgrasp = pd.melt(df_stoc, id_vars=['n', 'm', 'alpha', 'instance_name', 'stochsol_exp_cost', 'stochsol_time'], 
                     value_vars=budget_list, var_name='budget_Gamma', value_name='worstcase_cost')
df_ssgrasp = split_budget_gamma_column(df_ssgrasp)
df_ssgrasp

Notice we have 25 executions for each instance file (and respective alpha parameter). For result comparison, we will need one worstcase cost per instance and budget_Gamma. For now, we will group by instance file in order to obtain the smallest worstcase cost found after 25 SimGRASP executions:

In [None]:
#df_ssgrasp_min_worstcost = df_ssgrasp.groupby(['n', 'm', 'alpha', 'instance_name', 'stochsol_exp_cost', 'stochsol_time', 
#                                               'budget_Gamma', 'Gamma1', 'Gamma2']).min()
ssgrasp_columns = ['n', 'm', 'alpha', 'instance_name', 'budget_Gamma', 'Gamma1', 'Gamma2']
df_ssgrasp_min_worstcost = df_ssgrasp[df_ssgrasp['worstcase_cost'] == df_ssgrasp.groupby(ssgrasp_columns)['worstcase_cost']
                                                                                  .transform('min')]
df_ssgrasp_min_worstcost = df_ssgrasp_min_worstcost.sort_values(ssgrasp_columns).drop_duplicates(ssgrasp_columns)
df_ssgrasp_max_worstcost = df_ssgrasp[df_ssgrasp['worstcase_cost'] == df_ssgrasp.groupby(ssgrasp_columns)['worstcase_cost']
                                                                                  .transform('max')]
df_ssgrasp_max_worstcost = df_ssgrasp_max_worstcost.sort_values(ssgrasp_columns).drop_duplicates(ssgrasp_columns)

display(df_ssgrasp_min_worstcost.tail(4))
display(df_ssgrasp_max_worstcost.tail(4))

In [None]:
df_ssgrasp_min_worstcost[(df_ssgrasp_min_worstcost['instance_name'] == 'RB0101001.txt')]

### Robust dataframe self-join 

Now, lets join the `rpfs` dataframe with itself (inner join). This will be useful to compare Wilson and Wagner models.

In [None]:
join_columns = ['n', 'm', 'alpha', 'instance_name', 'Gamma1', 'Gamma2']
df_rob_self = pd.merge(df_rpfs, df_rpfs, how='inner', on=join_columns)
df_rob_self = df_rob_self[(df_rob_self['model_x'] != df_rob_self['model_y'])]

In [None]:
df_rob_self

# Tables

## Table 1. Average worst-case Cmax Wagner 

In [None]:
table = pd.pivot_table(df_rpfs_wagner, values='cmax_dp', index=['Gamma1', 'Gamma2'], columns=['alpha', 'n'], aggfunc='mean', fill_value=0)
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    display(table)

## Table 2. Average run time Wagner robust model

In [None]:
table = pd.pivot_table(df_rpfs_wagner, values='time', index=['Gamma1', 'Gamma2'], columns=['alpha', 'n'], aggfunc='mean', fill_value=0)
table = np.round(table, 2)
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    display(table)

## Table 3. Performance all instances 

In [None]:
df_rob_self['x_wins_y_time'] = (df_rob_self['time_x'] < df_rob_self['time_y']).astype(int)

In [None]:
table = pd.pivot_table(df_rob_self, columns=['n', 'model_x'], values=['time_x', 'gap_x', 'iterations_x', 'x_wins_y_time'],
                       aggfunc={'time_x' : ['mean', 'std'], 'gap_x' : 'mean', 'iterations_x' : ['mean', 'std'],
                               'x_wins_y_time' : ['sum']})  # , margins=True, fill_value=0)
table['perc_x_wins_y_time'] = table['x_wins_y_time'] * 100 / 1250
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    display(table)

In [None]:
df_grouped = df_rpfs_wagner.groupby(['alpha', 'n', 'm', 'budget_Gamma']).agg({'cmax_dp' : ['count']}).reset_index()
df_grouped.columns = [ ' '.join(str(i) for i in col) for col in df_grouped.columns]
#df_grouped.reset_index(inplace=True)
df_grouped

In [None]:
table = pd.pivot_table(df_rpfs_wagner, values='cmax_dp', index=['alpha', 'n'], columns=['Gamma1', 'Gamma2'], aggfunc='count', fill_value=0)
with pd.option_context('display.max_rows', None, 'display.max_columns', None): 
    display(table)

# Graphs

### Build a dataframe joining the `df_rpfs` and `df_dpfs`

In [None]:
join_columns = ['n', 'm', 'alpha', 'instance_name', 'Gamma1', 'Gamma2']
df_det_d0 = df_dpfs[(df_dpfs['perc_deviation_p_bar'] == 0)]
df_det_d100 = df_dpfs[(df_dpfs['perc_deviation_p_bar'] == 100)]
# join robust and deterministic dfs
df_join_rob_det = pd.merge(df_rpfs, df_det_d0, how='inner', on=join_columns, suffixes=('_rob', '_d0'))
df_join_rob_det = pd.merge(df_join_rob_det, df_det_d100, how='inner', on=join_columns, suffixes=('_d0', '_d100'))

### Build a dataframe concatenating `df_rpfs` and `df_dpfs`

In [None]:
def plot_worstcase_comparison(instance_name, df_dict):
    concat_columns = ['instance_name', 'Gamma1', 'Gamma2', 'budget_Gamma', 'worstcase_cost']
    for key, df_i in df_dict.items():
        df_i = df_i[concat_columns]
        df_i['Method'] = key
        df_dict[key] = df_i
    df = pd.concat(df_dict.values())
    df = df[(df['instance_name'] == instance_name)]
    # https://www.drawingfromdata.com/setting-figure-size-using-seaborn-and-matplotlib
    #fig, ax = plt.subplots()
    # the size of A4 paper
    #fig.set_size_inches(11.7, 8.27)
    marker = ['*', '+', 'o', 'x', '^', '8', 's', 'p', 'D', 'V']
    markers = [marker[i] for i in range(len(df["Method"].unique()))]
    linestyle = ['--', '-.', ':', 'dashed', 'dashdot', 'dotted', 'solid', '-', ' ', '']
    linestyles = [linestyle[i] for i in range(len(df["Method"].unique()))]
    a4_dims = (11.7, 8.27)
    plt.figure(figsize=a4_dims)
    sns.catplot(x="budget_Gamma", y="worstcase_cost",  markers=markers, linestyles=linestyles,
                 hue="Method", kind="point", style="Method", 
                 data=df,
                 height=5, # make the plot 5 units high
                 aspect=3) # height should be three times width
    plt.show()
    plt.savefig(os.path.join(outputfolder, '{}.svg'.format(instance_name)))

### Worstcase cost : Small Uncertainty Range Instance - Example 

Alpha = 10% and n = 50

TODO Comparar também com o RobPFSP - Minimax Regret

In [None]:
#filename = 'RB0501003.txt'  # Small Uncertainty range
filename = 'RB0201009.txt'  # Small Uncertainty range
filename = 'RB1001007.txt'  # Small Uncertainty range
filename = 'RB1501006.txt'  # Small Uncertainty range
filename = 'RB1502008.txt' # 'RB1502001.txt', 'RB1502006.txt', 'RB1502009.txt', 'RB1502010.txt'
# filename = 'RB0201007.txt'
#filename = 'RB0105010.txt'  # Large Uncertainty range
plot_worstcase_comparison(filename, {'Det_d0' : df_det_d0, 'Det_d100' : df_det_d100, 
                                     'SimGRASP_Min25' : df_ssgrasp_min_worstcost, 'SimGRASP_Max25' : df_ssgrasp_max_worstcost,
                                     'Rob' : df_rpfs_wagner})

### Worstcase cost : Large Uncertainty Range Instance - Example 

Alpha = 50% and n = 10

In [None]:
filename = 'RB0105010.txt'  # Large Uncertainty range: 'RB0505006.txt', 'RB0505003.txt', 'RB1005003.txt', 'RB1005006.txt', 'RB1005008.txt', 'RB1005009.txt'
#filename = 'RB1505001.txt'  # 'RB0205001.txt', 'RB0205003.txt', 'RB0205008.txt', 'RB0205009.txt', 'RB0505010.txt', 'RB0505009.txt'
# 'RB1505008.txt', 'RB1505007.txt', 'RB1505006.txt', 'RB1505004.txt', 'RB1505001.txt'
plot_worstcase_comparison(filename, {'Det_d0' : df_det_d0, 'Det_d100' : df_det_d100, 
                                     'SimGRASP_Min25' : df_ssgrasp_min_worstcost, 'SimGRASP_Max25' : df_ssgrasp_max_worstcost,
                                     'Rob' : df_rpfs_wagner})

In [None]:
def get_instance_solution_info(df_rpfs, instance_name):
    return df_rpfs[(df_rpfs['instance_name'] == instance_name)][['Gamma1', 'Gamma2', 'is_optimal', 
                                                                   'validated', 'gap', 'time', 'optimal']]

In [None]:
get_instance_solution_info(df_rpfs, 'RB1505002.txt')

## Plotting Monte Carlo Simulation results

In [None]:
sim_results_folder_det_rob = os.path.join(os.path.abspath('..'), 'pfsp_experiments', 'montecarlo_sim_det_and_rpfs', 
                                  '2020_06_21-16_46_14-3f338d1e-b400-11ea-1f13-43a2532b2fa8')
sim_results_folder_ssgrasp = os.path.join(os.path.abspath('..'), 'pfsp_experiments', 'simgrasp_outputs')
print('[Det, Rob] Using simulation results folder: ', sim_results_folder_det_rob)
print('[SSGRASP] Using simulation results folder: ', sim_results_folder_ssgrasp)

In [None]:
def read_simulation_result_csv_to_series(filename):
    #print('Reading file: ', filename)
    df = pd.read_csv(filename, index_col=False, header=0, names=['Makespan'])
    series = df['Makespan'] # here we convert the DataFrame into a Series
    return series

In [None]:
def read_budget_simulation_results_to_series(root_folder, instance_name, alpha, distribution, gamma1, gamma2, num_iter=10000):
    folder = os.path.join(root_folder, 'robust_pfsp', distribution, 'alpha{}%'.format(alpha))
    filename = 'MCS_rob_{}_{}_{}_{}_{}_iter{}.txt'.format(gamma1, gamma2, instance_name, alpha, distribution, num_iter)
    filepath = os.path.join(folder, filename)
    return read_simulation_result_csv_to_series(filepath)

In [None]:
def read_deterministic_simulation_results_to_series(root_folder, instance_name, alpha, distribution, perc_variation, num_iter=10000):
    folder = os.path.join(root_folder, 'deterministic_pfsp', distribution, 'alpha{}%'.format(alpha))
    filename = 'MCS_det{}_{}_{}_{}_iter{}.txt'.format(perc_variation, instance_name, alpha, distribution, num_iter)
    filepath = os.path.join(folder, filename)
    return read_simulation_result_csv_to_series(filepath)

In [None]:
def read_ssgrasp_outputs_to_series(filepath):
    with open(filepath, 'r') as content_file:
        content = content_file.read()
        content = content[content.find('STOCH')+5:]
        content = content.replace("\n", "")
        return pd.Series([float(x) for x in content.split()], name='Makespan')

In [None]:
def read_stochastic_simulation_results_to_series(root_folder, instance_name, alpha, distribution, num_iter=10000):
    # RB0105001_10_2_t_1.0_0.1_124341_outputsList.txt
    m = 2
    n = int(instance_name[2:5])
    grasp_instance_name = 'RB{}50{}'.format(instance_name[2:5], instance_name[7:9])
    filename = '{}_{}_{}_t_{:.1f}_{:.1f}_*_outputsList.txt'.format(grasp_instance_name, n, m, 1.0, alpha / 100)
    files = glob.glob(os.path.join(root_folder, filename))
    #print('file_count: ', len(files))
    series_list = []
    for filepath in files:
        series_list.append(read_ssgrasp_outputs_to_series(filepath))
    result = pd.concat(series_list)
    #display(result)
    return result

In [None]:
def plot_violin_compare_distributions(df):
    # https://www.drawingfromdata.com/setting-figure-size-using-seaborn-and-matplotlib
    #fig, ax = plt.subplots()
    # the size of A4 paper
    #fig.set_size_inches(11.7, 8.27)
    a4_dims = (11.7, 8.27)
    plt.figure(figsize=a4_dims)
    chart = sns.violinplot(y="Makespan", x="Method",   # x="Makespan", y="Method", 
                           #hue="Method", #kind="violin", 
                           style="Method", 
                 data=df, palette="Set3", 
                 scale="width")        
                 # height=10, # make the plot 5 units high
                 # aspect=0.7) # height should be three times width
    chart.set_xticklabels(
        chart.get_xticklabels(), 
        rotation=90, 
        #horizontalalignment='right',
        fontweight='light',
        fontsize='large'
    )
    plt.show()
    #plt.savefig(os.path.join(outputfolder, '{}.svg'.format(instance_name)))

In [None]:
def plot_simulation_comparison(sim_results_folder_det_rob, sim_results_folder_ssgrasp, instance_name, distribution):
    alpha = int(instance_name[5:7])
    simulated_solutions_dict = dict()
    simulated_solutions_dict['Det_d0'] = read_deterministic_simulation_results_to_series(sim_results_folder_det_rob, 
                                                                                         instance_name, 
                                                                                         alpha, distribution, 0)
    simulated_solutions_dict['Det_d100'] = read_deterministic_simulation_results_to_series(sim_results_folder_det_rob, 
                                                                                           instance_name, 
                                                                                           alpha, distribution, 100)
    simulated_solutions_dict['SimGRASP'] = read_stochastic_simulation_results_to_series(sim_results_folder_ssgrasp, 
                                                                                        instance_name, alpha, 
                                                                                        distribution)
    for gamma1 in [20, 40, 60, 80, 100]:
        for gamma2 in [20, 40, 60, 80, 100]:
            simulated_solutions_dict['Rob_{}_{}'.format(gamma1, gamma2)] = read_budget_simulation_results_to_series(
                                                                            sim_results_folder_det_rob, 
                                                                            filename, alpha, distribution, gamma1, gamma2)
    df_list = []
    for key, s_i in simulated_solutions_dict.items():
        df_i = s_i.to_frame()
        df_i['Method'] = key
        df_i['Distribution'] = distribution
        df_list.append(df_i)
    df = pd.concat(df_list)
    plot_violin_compare_distributions(df)

### Expected cost : Small Uncertainty Range Instance - Example 

Alpha = 10% and n = 50

TODO Comparar também com o RobPFSP - Minimax Regret

In [None]:
filename = 'RB0201009.txt'  # Small Uncertainty range
filename = 'RB0501003.txt'  # Small Uncertainty range
filename = 'RB1001007.txt'  # Small Uncertainty range
filename = 'RB1501006.txt'  # Small Uncertainty range
#filename = 'RB1502008.txt' # 'RB1502001.txt', 'RB1502006.txt', 'RB1502009.txt', 'RB1502010.txt'
# filename = 'RB0201007.txt'
plot_simulation_comparison(sim_results_folder_det_rob, sim_results_folder_ssgrasp, filename, 'lognormal')
                           # distribution_list=['lognormal', 'uniform'])

### Expected cost : Large Uncertainty Range Instance - Example 

Alpha = 50% and n = 10

In [None]:
#filename = 'RB0105010.txt'  # Large Uncertainty range: 'RB0505006.txt', 'RB0505003.txt', 'RB1005003.txt', 'RB1005006.txt', 'RB1005008.txt', 'RB1005009.txt'
filename = 'RB1505001.txt'  # 'RB0205001.txt', 'RB0205003.txt', 'RB0205008.txt', 'RB0205009.txt', 'RB0505010.txt', 'RB0505009.txt'
# 'RB1505008.txt', 'RB1505007.txt', 'RB1505006.txt', 'RB1505004.txt', 'RB1505001.txt'
plot_simulation_comparison(sim_results_folder_det_rob, sim_results_folder_ssgrasp, filename, 'lognormal')
                           #distribution_list=['lognormal', 'uniform'])

### Export the dataset to CSV file 

In [None]:
%%time

print('Saving file on folder: ' + rootfolder)
fname = os.path.join(rootfolder, '2RPFS_Cmax_all_results.csv')
df.to_csv(fname, sep=';')
print('Saved: ' + fname)