# Process CCP simulation results - Japan microgrid

In [None]:
import pandas as pd
import numpy as np
import os, fnmatch
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)
import glob
import seaborn as sns
import gzip
import matplotlib.style as style
from matplotlib.path import Path
from matplotlib.patches import BoxStyle
from sys import platform
import pyarrow as pa
%matplotlib inline

In [None]:
# Import custom python file from github repo: https://changhsinlee.com/colab-import-python/
!pip install requests
import requests
# Save python as file to colab working directory
# If you are using GitHub, make sure you get the "Raw" version of the code
url = 'https://raw.githubusercontent.com/levorato/ccp_rtcs/master/notebooks/rccp_utils.py'
r = requests.get(url)
# make sure your filename is the same as how you want to import 
with open('rccp_utils.py', 'w') as f:
    f.write(r.text)
# now we can import
from rccp_utils import *

## 1. Process result files

### 1.1. Setup project folders

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

In [None]:
gdrive_folder = '/content/gdrive/MyDrive'
project_folder = '..'
antoine_instances_folder = os.path.join(project_folder, "instances", "utc_skew")
toy_instances_folder = os.path.join(project_folder, "instances", "toy")
instances_folder = os.path.join(project_folder, "instances")
japan_instances_folder = os.path.join(project_folder, "instances", "japan_microgrid")
output_folder = os.path.join(gdrive_folder, "rccp_experiments")
results_folder = os.path.join(gdrive_folder, "rccp_results")
print("*** Project folder is", project_folder)
print("*** Instances folder is",  instances_folder)
print("*** Output folder is", output_folder)

### 1.2. List which experiments to process

In [None]:
experiment_list = ["run_sim_japan_forecast_avg"]

In [None]:
experiment_folder_list = [os.path.join(output_folder, exp) for exp in experiment_list]
experiment_folder_list

### 1.3. List which CPP models to process

In [None]:
simulated_model_list = ["robust-budget", "robust-box", "robust-budget"]

### 1.4. Select instance_group to process

In [None]:
instance_group_list = ["japan-10"]

### 1.5. Select RTCS forecast types to process

In [None]:
forecast_type_list = ["average"]  # average-based RTCS forecast

In [None]:
instance_group = "japan-10"

### 1.6. Read consolidated result file

In [None]:
result_path = os.path.join(experiment_folder_list[0], "run_sim_japan_forecast_avg.japan-10.results.pkl.gz")
df = pd.read_pickle(result_path)

In [None]:
df.head()

In [None]:
df.info()

### 1.7. Create the output folders for processed results

In [None]:
reportfolder = os.path.join(results_folder, 'consolidated_results', experiment_list[0])
reportfolder_graph = os.path.join(reportfolder, 'graphs')
reportfolder_table = os.path.join(reportfolder, 'tables')
if not os.path.exists(reportfolder_graph):
    os.makedirs(reportfolder_graph)
if not os.path.exists(reportfolder_table):
    os.makedirs(reportfolder_table)
print('Saving files on folder: ' + reportfolder)

### 1.8. Obtain list of Model, Strategy, ModelPolicy, ForecastType

In [None]:
model_list = df['Model'].unique().tolist()
strategy_list = df['Strategy'].unique().tolist()
model_policy_list = df['ModelPolicy'].unique().tolist()
reoptimize_value_list = df['Reoptimize'].unique().tolist()
forecast_type_list = df['ForecastType'].unique().tolist()
instances_to_process = df['InstanceName'].unique().tolist()
print("Model", model_list)
print("Strategy", strategy_list)
print("ModelPolicy", model_policy_list)
print("Reoptimize", reoptimize_value_list)
print("ForecastType", forecast_type_list)
print("InstanceName", instances_to_process)

### 1.9. Replace the InstanceName column with the season name of each instance

In [None]:
df['OriginalInstanceName'] = df['InstanceName']
df['InstanceName'] = df['InstanceName'].str.replace('instance_deltamin10_', '', regex=False).str.replace('.txt', '', regex=False)
df['GammaPerc'] = df['GammaPerc'].astype(int)
df.head()

### 1.10. Create 2 new columns: one called ModelName one with the RTCS Policy

* `ModelName` contains MILP model name including parameters (in the budget case)

* `RTCS_Policy` concatenates the info about policy (conservative, audacious, cheapest), look-ahead (i.e., full_model, ignore_model) and model reoptimization (true, false).

In [None]:
# ModelName
df['ModelName'] = df['Model']
df.loc[(df['Model'] == 'robust-budget'), 'ModelName'] = df.loc[(df['Model'] == 'robust-budget'), 'Model'] + '-'\
    + df.loc[(df['Model'] == 'robust-budget'), 'GammaPerc'].astype(str)
# RTCSPolicy
df['RTCS_Policy'] = df['Strategy'] +	df['ModelPolicy'].str.replace('ignore_model', '').str.replace('full_model', '+LA')\
                + df['Reoptimize'].astype(str).str.replace('True', '+ReOpt').str.replace('False', '')
#df.drop(columns=['Strategy', 'ModelPolicy', 'Reoptimize', 'ForecastType'], inplace=True)
df.head()

## Table 0. Number of scenarios per instance

In [None]:
df_num_scenarios_per_instance = df[((df['t'] == 1) & (df['d'] == 1))].groupby(by=['InstanceName', 'Model', 'ModelName', 'RTCS_Policy', 'Gamma', 'GammaPerc']).count().reset_index()
df_num_scenarios_per_instance = df_num_scenarios_per_instance[['InstanceName', 'Model', 'ModelName', 'RTCS_Policy', 'Gamma', 'GammaPerc', 'ScenarioId']]
df_num_scenarios_per_instance.rename(columns={'ScenarioId' : 'ScenarioCount'}, inplace=True)
df_num_scenarios_per_instance

## Table 1. Simulation performance given all instances 

Model-wise RTCS simulation performance comparison, given all instances.

* Median, Mean, Std. dev and sum of each measure (cost, e_td, gap, time). 

In [None]:
per_instance_stats = dict()
instances_to_process = df['InstanceName'].unique().tolist()
for instance_name in instances_to_process:  # group by instance
    df_itype = df[(df['InstanceName'] == instance_name)]
    _model_list = df_itype['Model'].unique().tolist()
    for model in _model_list:
        df_model = df_itype[df_itype['Model'] == model]
        _gamma_perc_list = df_model['GammaPerc'].unique().tolist()
        for gamma_perc in _gamma_perc_list:
            df_gamma = df_model[df_model['GammaPerc'] == gamma_perc]
            policy_list = df_gamma['RTCS_Policy'].unique().tolist()
            for policy in policy_list:
                  df_ = df_gamma[df_gamma['RTCS_Policy'] == policy]
                  key = (instance_name, model, gamma_perc, policy)
                  per_instance_stats[key] = dict()
                  #per_instance_stats[key]['% Best Performance'] = calculate_perc_best_performance(df_instance, model)
                  #per_instance_stats[key]['% Solved'] = calculate_perc_solved(df_rpfs, model, instance_type, instance_size)
                  #per_instance_stats[key]['Avg. % gap'] = calculate_avg_perc_gap(df_instance, model)
                  per_instance_stats[key]['Median time'] = np.round(df_['RealProcTime'].median(), 2)
                  per_instance_stats[key]['Avg. time'] = np.round(df_['RealProcTime'].mean(), 2)
                  per_instance_stats[key]['Std. dev. of time'] = np.round(df_['RealProcTime'].std(), 2)
                  per_instance_stats[key]['Total time'] = np.round(df_['RealProcTime'].sum(), 2)
                  
                  per_instance_stats[key]['Median cost'] = np.round(df_['cost'].median(), 2)
                  per_instance_stats[key]['Avg. cost'] = np.round(df_['cost'].mean(), 2)
                  per_instance_stats[key]['Std. dev. of cost'] = np.round(df_['cost'].std(), 2)
                  per_instance_stats[key]['Total cost'] = np.round(df_['cost'].sum(), 2)
                  
                  per_instance_stats[key]['Median gap'] = np.round(df_['gap'].median(), 2)
                  per_instance_stats[key]['Avg. gap'] = np.round(df_['gap'].mean(), 2)
                  per_instance_stats[key]['Std. dev. of gap'] = np.round(df_['gap'].std(), 2)
                  per_instance_stats[key]['Total gap'] = np.round(df_['gap'].sum(), 2)
                  
                  per_instance_stats[key]['Median e_td'] = np.round(df_['e_td'].median(), 2)
                  per_instance_stats[key]['Avg. e_td'] = np.round(df_['e_td'].mean(), 2)
                  per_instance_stats[key]['Std. dev. of e_td'] = np.round(df_['e_td'].std(), 2)
                  per_instance_stats[key]['Total e_td'] = np.round(df_['e_td'].sum(), 2)

In [None]:
df_table1 = pd.DataFrame.from_dict(per_instance_stats)
df_table1.head()

### Table 1. Total cost considering all simulations for a specific CCP model and RTCS policy

In [None]:
df_totals = df.drop(columns=['t', 'd', 'OptTimeSpent']).groupby(by=['InstanceName', 'Model', 'GammaPerc', 'Gamma', 'RTCS_Policy']).sum()
df_total_proc_time = df_totals.drop(columns=['ScenarioId', 'e_td', 'gap', 'ObjValue', 'cost'])
df_total_cost = df_totals.drop(columns=['ScenarioId', 'e_td', 'gap', 'ObjValue', 'RealProcTime']).reset_index()
# total simulation cost of the deterministic model
df_total_cost_det = df_total_cost[(df_total_cost['Model'] == 'deterministic')].drop(columns=['Model', 'GammaPerc', 'Gamma']).rename(columns={"cost": "cost(det)"})
# total simulation cost of the box model
df_total_cost_box = df_total_cost[(df_total_cost['Model'] == 'robust-box')].drop(columns=['Model', 'GammaPerc', 'Gamma']).rename(columns={"cost": "cost(box)"})
# total simulation cost of the budget model
df_total_cost_bud = df_total_cost[(df_total_cost['Model'] == 'robust-budget')].drop(columns=['Model']).rename(columns={"cost": "cost(bud)"})
df_total_cost_bud_pivot = pd.pivot_table(df_total_cost_bud, values='cost(bud)', index=['InstanceName', 'RTCS_Policy'], \
                                         columns=['GammaPerc'], aggfunc=np.sum)
df_total_cost_bud_pivot.columns = [('Cost(bud_' + str(_) + ')') for _ in df_total_cost_bud_pivot.columns]
df_total_cost_bud_pivot = df_total_cost_bud_pivot.reset_index()


In [None]:
df_total_cost_bud_pivot

#### Join the det, box and bud costs in the same dataframe for comparison

In [None]:
join_columns_total_cost = ['InstanceName', 'RTCS_Policy']
df_total_cost_join = df_total_cost_det.merge(df_total_cost_box, on=join_columns_total_cost, suffixes=('_det', '_box'))\
                        .merge(df_total_cost_bud_pivot, on=join_columns_total_cost, suffixes=('', '_bud'))
#df_total_cost_join.loc[(), 'Gamma'] = np.nan
#df_total_cost_join.loc[(), 'GammaPerc'] = np.nan
df_total_cost_join.head()

### Table 2. Cost of the most expensive scenario (worst simulation cost), grouped by CCP model and simulation parameters

In [None]:
df_t2 = df.drop(columns=['t', 'd', 'OptTimeSpent', 'ObjValue'])
df_t2 = df_t2.groupby(by=['InstanceName', 'Model', 'ModelName', 'RTCS_Policy', 'ScenarioId']).sum().\
    drop(columns=['e_td', 'gap', 'RealProcTime', 'GammaPerc', 'Gamma']).\
    groupby(by=['InstanceName', 'Model', 'ModelName', 'RTCS_Policy']).\
    max()

df_rob = df_t2.reset_index()
df_rob = df_rob[(df_rob['Model'] == 'robust-budget') | (df_rob['Model'] == 'robust-box')]
df_det = df_t2.reset_index().drop(columns=['ModelName'])
df_det = df_det[df_det['Model'] == 'deterministic']
df_wins_t2 = df_rob.merge(df_det, on=['InstanceName', 'RTCS_Policy'], suffixes=('_rob', '_det'))\
    .drop(columns=['Model_det'])
df_wins_t2['MaxRobCost_Smaller'] = (df_wins_t2['cost_rob'] < df_wins_t2['cost_det']).astype(int)

In [None]:
#p = sns.countplot(data=df_wins,
#                  y = 'InstanceName',
#                  hue = 'Model_rob')
# grouped barplot
# g = sns.barplot(x="ModelName", y="rob_wins", hue="InstanceName", data=df_wins_t2, ci=None)
g = sns.catplot(y="ModelName", x="MaxRobCost_Smaller",
                 col="InstanceName", hue="RTCS_Policy", 
                 palette="pastel", edgecolor=".6", # orient="h", height=1.5, aspect=4, 
                 data=df_wins_t2, kind="bar", ci=None)
g.set_xticklabels(rotation=90)

In [None]:
df_wins_t2.set_index(['InstanceName', 'ModelName', 'RTCS_Policy'])

### Table 3. RTCS performance map (robust wins)

Number of scenarios where Robust RTCS obtained smaller cost, when compared to the Deterministic RTCS, when simulating the same scenario.

In [None]:
df_scenario = df.drop(columns=['t', 'd', 'OptTimeSpent', 'ObjValue'])
df_scenario['ModelName'] = df_scenario['Model']
df_scenario.loc[(df_scenario['Model'] == 'robust-budget'), 'ModelName'] = df_scenario.loc[(df_scenario['Model'] == 'robust-budget'), 'Model'] + '-'\
    + df_scenario.loc[(df_scenario['Model'] == 'robust-budget'), 'GammaPerc'].astype(str)
df_scenario = df_scenario.groupby(by=['InstanceName', 'Model', 'ModelName', 'RTCS_Policy', 'ScenarioId']).sum()\
    .drop(columns=['gap', 'RealProcTime', 'GammaPerc', 'Gamma']).reset_index()

# simulation cost of the deterministic model, per scenario
df_cost_det = df_scenario[(df_scenario['Model'] == 'deterministic')]
# simulation cost of the box model, per scenario
df_cost_box = df_scenario[(df_scenario['Model'] == 'robust-box')]
# simulation cost of the budget model, per scenario
df_cost_bud = df_scenario[(df_scenario['Model'] == 'robust-budget')]

df_t3 = pd.concat([df_cost_det, df_cost_box, df_cost_bud])
df_cheapest_policy_per_scenario = df_t3.drop(columns=['e_td', 'Reoptimize']).groupby(by=['InstanceName', 'ScenarioId']).min()
df_cheapest_policy_per_scenario

In [None]:
g = sns.catplot(x="ModelName", 
                 col="InstanceName", hue="RTCS_Policy",
                 data=df_cheapest_policy_per_scenario.reset_index(), kind="count", ci=None)
g.set_xticklabels(rotation=90)

In [None]:
df_target = df_scenario.reset_index().drop(columns=['e_td'])
df_target = df_target[(df_target['Model'] == 'robust-budget') | (df_target['Model'] == 'robust-box')]
df_det = df_scenario.reset_index().drop(columns=['e_td', 'ModelName'])
df_det = df_det[df_det['Model'] == 'deterministic']
df_wins_t3 = df_target.merge(df_det, on=['InstanceName', 'RTCS_Policy', 'ScenarioId'], suffixes=('_target', '_det'))\
    .drop(columns=['Model_det', 'Model_target'])
df_wins_t3['Cost_Smaller'] = (df_wins_t3['cost_target'] <= df_wins_t3['cost_det']).astype(int)
df_wins_t3

In [None]:
df_num_scenarios_per_instance.info()

In [None]:
df_wins_t3_grouped.info()

In [None]:
df_wins_t3_grouped = df_wins_t3.groupby(by=['InstanceName', 'ModelName', 'RTCS_Policy']).sum()  # 'ForecastType'
df_wins_t3_grouped_perc = df_wins_t3_grouped.reset_index().merge(df_num_scenarios_per_instance, on=['InstanceName', 'ModelName', 'RTCS_Policy'])
df_wins_t3_grouped_perc['Cost_Smaller_Perc'] = np.round((100 * df_wins_t3_grouped_perc['Cost_Smaller']) / df_wins_t3_grouped_perc['ScenarioCount'], 0).astype(int)
df_wins_t3_grouped_perc

In [None]:
g = sns.catplot(x="ModelName", y="Cost_Smaller_Perc",
                 col="InstanceName", hue="RTCS_Policy",
                 data=df_wins_t3_grouped_perc.reset_index(), kind="bar", ci=None, orient='v')
g.set_xticklabels(rotation=90)

### Table 4. Cheapest RTCS Strategy, per instance and model type

In [None]:
df_group = df.drop(columns=['t', 'd', 'OptTimeSpent', 'ObjValue']).groupby(by=['InstanceName', 'Model', 'GammaPerc', 'Gamma', 'RTCS_Policy', 'ScenarioId']).sum()\
    .drop(columns=['gap', 'RealProcTime'])
# Find the cheapest strategy for each model type
df_cheapest = df_group.groupby(by=['InstanceName', 'Model', 'GammaPerc', 'Gamma', 'ScenarioId']).min().drop(columns=['e_td', 'Reoptimize'])
df_cheapest

In [None]:
df_target = df_cheapest.reset_index()
#df_rob = df_rob[(df_rob['Model'] == 'robust-budget') | (df_rob['Model'] == 'robust-box')]
df_det = df_cheapest.reset_index()
df_det = df_det[df_det['Model'] == 'deterministic']
df_wins_cheapest = df_target.merge(df_det, on=['InstanceName', 'ScenarioId'], suffixes=('_target', '_det'))\
    .drop(columns=['Model_det', 'Gamma_target', 'Gamma_det', 'GammaPerc_det'])
df_wins_cheapest['rob_wins'] = (df_wins_cheapest['cost_target'] <= df_wins_cheapest['cost_det']).astype(int)
#df_wins_cheapest['det_wins'] = (df_wins_cheapest['cost_rob'] > df_wins_cheapest['cost_det']).astype(int)
df_wins_cheapest['#scenarios'] = 1

In [None]:
df_wins_cheapest.head()

In [None]:
df_wins_grouped = df_wins_cheapest.groupby(by=['InstanceName', 'Model_target', 'GammaPerc_target']).sum().drop(columns=['ScenarioId'])
df_wins_grouped['rob_wins_%'] = np.round(100 * df_wins_grouped['rob_wins'] / df_wins_grouped['#scenarios'], 2)
#df_wins_grouped['det_wins_%'] = np.round(100 * df_wins_grouped['det_wins'] / df_wins_grouped['#scenarios'], 2)
df_wins_grouped = df_wins_grouped.merge(df_num_scenarios_per_instance, left_on=['InstanceName', 'Model_target', 'GammaPerc_target'],
                                        right_on=['InstanceName', 'Model', 'GammaPerc'])
df_wins_grouped.head()

In [None]:
g = sns.catplot(x="ModelName", y="rob_wins_%",
                 col="InstanceName", hue="RTCS_Policy",
                 data=df_wins_grouped.reset_index(), kind="bar", ci=None, orient='v')
g.set_xticklabels(rotation=90)

### Simulation cost of each scenario

In [None]:
df_simulation_cost = df.drop(columns=['t', 'd', 'OptTimeSpent']).groupby(by=['InstanceName', 'ScenarioId', 'Model', 'GammaPerc', 'Gamma', 'RTCS_Policy']).sum()

### Figure. Split violin plot with the costs of each scenario, comparing Rob x Det

In [None]:
df_box_vs_det = df_scenario[(df_scenario['ModelName'] == 'robust-box') | (df_scenario['ModelName'] == 'deterministic')]
df_box_vs_det['ModelName'].unique()

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
plot4 = sns.catplot(x="RTCS_Policy", y="cost", hue="ModelName",
            kind="violin", split=True,
            palette="pastel", data=df_box_vs_det)
plot4.set_xticklabels(rotation=90)

In [None]:
#a4_dims = (11.7, 8.27)
#fig, ax = plt.subplots(figsize=a4_dims)
plt.figure(figsize=(20,5))
sns.catplot(x="cost", y="RTCS_Policy", hue="ModelName", row="InstanceName", 
            kind="violin", bw=.15, cut=0, 
            data=df_scenario,
            height=25, # make the plot 15 units high
            aspect=0.5) # height should be 2 times width

In [None]:
#a4_dims = (11.7, 8.27)
#fig, ax = plt.subplots(figsize=a4_dims)
plt.figure(figsize=(20,5))
sns.catplot(x="cost", y="RTCS_Policy", hue="ModelName", 
            kind="violin", bw=.15, cut=0, 
            data=df_scenario[(df_scenario['InstanceName'] == 'spring')],
            height=25, # make the plot 15 units high
            aspect=0.5) # height should be 2 times width

### TODO Fazer um kde distribution plot dos custos do RTCS obtidos nas simulacoes: robusto-gamma vs. deterministico

### TODO Fazer uma tabela com as medidas estatisticas (para cada distribuicao usada) de cada simulacao, incluindo valor esperado, SD, percentis 95, 99 e valor maximo observado empiricamente.

In [None]:
for experiment_folder in experiment_folder_list:
    for instance_group in instance_group_list:
        instance_list = get_instance_list(project_folder, antoine_instances_folder, toy_instances_folder, instance_group)
        print(instance_group, instance_list)
        for model in simulated_model_list:
            for forecast_type in forecast_type_list:
                print(model, forecast_type)