In [None]:
import pandas as pd
import seaborn as sns
from pathlib import Path
from scipy.stats import t
import numpy as np

In [None]:
# Load the data
output_path = Path('../outputs/20240609-192217')

data = pd.read_csv(output_path / 'results.csv')
data.fillna(0, inplace=True)

data['algorithm'] = data.agent + '_t' + data.time_limit.astype(str)
data.loc[data.agent == 'LP', 'algorithm'] = 'LP'
data['benchmark'] = data.env + '_' + data.instance + '_sp' + data.slide_prob.astype(str) + '_tp' + data.trap_prob.astype(str)

data['feasible'] = data.penalty_mean <= data.c

data['t'] = (data['penalty_mean'] - data['c']) * np.sqrt(data['repetitions']) / data['penalty_std']
data.loc[data['penalty_std'] == 0, 't'] = 0
data['feasible_low'] = data['t'] <= t(df=data['repetitions']-1).ppf(0.95)
data['feasible_high'] = data['t'] <= t(df=data['repetitions']-1).ppf(0.05)
data.loc[data.agent == 'LP', 'feasible_low'] = True
data.loc[data.agent == 'LP', 'feasible_high'] = True

data.head()

In [None]:
lp_data = data.loc[data.agent == 'LP']

# Add LP accurate values
real_values = lp_data[['reward_mean', 'benchmark', 'c']]

extended_data = data.merge(real_values, on=['benchmark', 'c'], suffixes=('', '_real'))

m = extended_data['reward_mean'].min()

extended_data['reward_mean'] -= m
extended_data['reward_mean_real'] += 0.00001

extended_data.sort_values(by=['benchmark', 'algorithm', 'c'], inplace=True)
extended_data.head()

### Tables

In [None]:
import warnings
warnings.filterwarnings("ignore")

# minimum of reward_mean and reward_mean_real
def get_table(data):
    data['min_reward_mean'] = data[['reward_mean', 'reward_mean_real']].min(axis=1)
    data['cvm'] = data['feasible'] * data['min_reward_mean']
    data['cvl'] = data['feasible_low'] * data['min_reward_mean']
    data['cvh'] = data['feasible_high'] * data['min_reward_mean']

    for m in ['cvm', 'cvl', 'cvh']:
        data[m] /= data.groupby('algorithm')[m].mean().max()

    data['cvmn'] = data['feasible'] * data['min_reward_mean'] / (data['reward_mean_real'])
    data['cvln'] = data['feasible_low'] * data['min_reward_mean'] / (data['reward_mean_real'])
    data['cvhn'] = data['feasible_high'] * data['min_reward_mean'] / (data['reward_mean_real'])

    return data.groupby(['algorithm'])[[
        'feasible_low', 'feasible', 'feasible_high',
        'cvl', 'cvm', 'cvh',
        'cvln', 'cvmn', 'cvhn',
    ]].mean().sort_values(by='feasible', ascending=False)

tables = {
    'table.csv': get_table(extended_data),
}
for env in extended_data.env.unique():
    tables[f'table_{env}.csv'] = get_table(extended_data.loc[extended_data.env == env])

for name, table in tables.items():
    table.to_csv(output_path / name)
    print('\n', name, '\n', table)


warnings.filterwarnings("default")


### Plots

In [None]:
# import warnings
# warnings.filterwarnings("ignore")

# import matplotlib.pyplot as plt

# # Plot meand reward vs. c
# # Each benchmark is a subplot
# # Each algorithm is a line
# # Use `reward_std` for error bars


# for time in [t for t in extended_data['time_limit'].unique() if t >= 0]:
#     filtered_data = extended_data[(extended_data['time_limit'] == time) | (extended_data['time_limit'] == -1)]
#     filtered_data['reward_std'] /= np.sqrt(filtered_data['repetitions'])

#     default_palette = sns.color_palette()
#     algorithms = filtered_data['algorithm'].unique()
#     palette = {algorithm: default_palette[i] for i, algorithm in enumerate(algorithms)}


#     # Create a FacetGrid with the benchmark feature
#     g = sns.FacetGrid(filtered_data, col="benchmark", col_wrap=4, height=4, aspect=1.5)

#     # Do not show the plot in a notebook
#     plt.ioff()

#     # Iterate through each subplot to add error bars manually
#     for ax, (benchmark_value, subset) in zip(g.axes.flatten(), filtered_data.groupby('benchmark')):
#         sns.lineplot(x='c', y='reward_mean', hue='algorithm', palette=palette, err_style=None, data=subset, ax=ax)

#         non_feasible = subset[subset['feasible_low'] == False]
#         sns.scatterplot(x='c', y='reward_mean', hue='algorithm', palette=palette, marker='x', s=50, data=non_feasible, ax=ax)
#         ax.grid(True)

#         for alg in algorithms:
#             alg_data = subset[subset['algorithm'] == alg]
#             ax.fill_between(
#                 alg_data['c'],
#                 alg_data['reward_mean'] - alg_data['reward_std'],
#                 alg_data['reward_mean'] + alg_data['reward_std'],
#                 alpha=0.2,
#                 color=palette[alg]
#             )

#     # Adjust the titles and labels
#     g.set_titles(col_template="{col_name}")
#     g.set_axis_labels("C", "Mean Reward")

#     g.savefig(output_path / f'mean_reward_vs_c_t{time}.svg')


# warnings.filterwarnings("default")