In [1]:
import pandas as pd
import seaborn as sns
from pathlib import Path
from scipy.stats import binomtest
import numpy as np

In [12]:
# Load the data
output_path = Path('../outputs/20240601-181954-ContHallway')

lp_data = pd.read_csv(output_path / 'results_lp.csv', sep=';')
lp_data['mean_reward'] = lp_data['reward']
lp_data['std_reward'] = 0
lp_data['mean_penalty'] = lp_data['c']
# Set mean_penalty to 2 for infeasible solutions
lp_data.loc[lp_data['feasible'] == False, 'mean_penalty'] = 2
lp_data.loc[lp_data['time'] < 0, 'mean_reward'] = 0
lp_data['std_penalty'] = 0
lp_data['algorithm'] = 'LP'

# remove whatever is between second and third _
columns = lp_data['benchmark'].str.split('_', expand=True)
num_cols = columns.shape[1]
lp_data['filename'] = columns[0]
for i in range(1, num_cols-3):
    lp_data['filename'] = lp_data['filename'] + '_' + columns[i]
lp_data['p_slide'] = columns[num_cols-2].str.replace('slide', '').astype(float)
lp_data['p_trap'] = columns[num_cols-1].str.replace('trap', '').astype(float)
lp_data['benchmark'] = lp_data['filename'] + '_slide' + lp_data['p_slide'].astype(str) + '_trap' + lp_data['p_trap'].astype(str)

if 'computable' in lp_data.columns:
    lp_data.rename(columns={'computable': 'feasible'}, inplace=True)

lp_data['feasible_low'] = lp_data['feasible_high'] = lp_data['feasible_mean'] = lp_data['feasible']

lp_data.head()

Unnamed: 0,benchmark,c,feasible,reward,time,mean_reward,std_reward,mean_penalty,std_penalty,algorithm,filename,p_slide,p_trap,feasible_low,feasible_high,feasible_mean
0,final_13_slide0.0_trap0.1,0.0,True,5.598752,259.489775,5.598752,0,0.0,0,LP,final_13,0.0,0.1,True,True,True
1,final_14_slide0.0_trap0.1,0.0,False,0.0,-1.0,0.0,0,2.0,0,LP,final_14,0.0,0.1,False,False,False
2,final_15_slide0.0_trap0.1,0.0,False,0.0,-1.0,0.0,0,2.0,0,LP,final_15,0.0,0.1,False,False,False
3,final_16_slide0.0_trap0.1,0.0,True,5.473211,115.805864,5.473211,0,0.0,0,LP,final_16,0.0,0.1,True,True,True
4,final_17_slide0.0_trap0.1,0.0,True,4.69269,17.142534,4.69269,0,0.0,0,LP,final_17,0.0,0.1,True,True,True


In [13]:

import numpy as np
from scipy.stats import t

agent_data = None
agents = ['ParetoUCT', 'DualUCT', 'RAMCP', 'DualRAMCP']
for agent in agents:
    try:
        data = pd.read_csv(output_path / f'results_{agent}.csv', sep=';')
        data['agent'] = agent
        if agent_data is None:
            agent_data = data
        else:
            agent_data = pd.concat([agent_data, data])
    except FileNotFoundError:
        pass

agent_data['benchmark'] = agent_data['filename'] + \
    '_slide' + agent_data['p_slide'].astype(str) + \
    '_trap' + agent_data['p_trap'].astype(str)

agent_data['algorithm'] = agent_data['agent'] + '_t' + agent_data['time_limit'].astype(str)

if 'repetitions' not in agent_data.columns:
    agent_data['repetitions'] = 100

# t-statistic quantiles
agent_data['t'] = (agent_data['mean_penalty'] - agent_data['c']) * np.sqrt(agent_data['repetitions']) / agent_data['std_penalty']
agent_data.loc[agent_data['std_penalty'] == 0, 't'] = 0
agent_data['feasible_low'] = agent_data['t'] <= t(df=agent_data['repetitions']-1).ppf(0.95)
agent_data['feasible_high'] = agent_data['t'] <= t(df=agent_data['repetitions']-1).ppf(0.05)
agent_data['feasible_mean'] = agent_data['mean_penalty'] <= agent_data['c']

agent_data.head()

Unnamed: 0,filename,c,p_slide,p_trap,time_limit,mean_reward,std_reward,mean_penalty,std_penalty,feasible,mean_time_per_step,repetitions,agent,benchmark,algorithm,t,feasible_low,feasible_high,feasible_mean
0,final_13,0.0,0.0,0.1,5,5.97,0.171447,0.0,0.0,True,0.005064,100,ParetoUCT,final_13_slide0.0_trap0.1,ParetoUCT_t5,0.0,True,False,True
1,final_13,0.0,0.0,0.1,10,5.98,0.140705,0.0,0.0,True,0.010139,100,ParetoUCT,final_13_slide0.0_trap0.1,ParetoUCT_t10,0.0,True,False,True
2,final_13,0.0,0.0,0.1,25,6.0,0.0,0.0,0.0,True,0.025278,100,ParetoUCT,final_13_slide0.0_trap0.1,ParetoUCT_t25,0.0,True,False,True
3,final_13,0.0,0.0,0.1,50,6.0,0.0,0.0,0.0,True,0.050546,100,ParetoUCT,final_13_slide0.0_trap0.1,ParetoUCT_t50,0.0,True,False,True
4,final_14,0.0,0.0,0.1,5,1.0,0.0,0.0,0.0,True,0.005101,100,ParetoUCT,final_14_slide0.0_trap0.1,ParetoUCT_t5,0.0,True,False,True


In [14]:
# Merge the data
all_data = pd.concat([agent_data, lp_data], ignore_index=True)

# Add LP accurate values
real_values = lp_data[['mean_reward', 'benchmark', 'c']]

all_data = all_data.merge(real_values, on=['benchmark', 'c'], suffixes=('', '_real'))

m = all_data['mean_reward'].min()

all_data['mean_reward'] -= all_data['mean_reward']
all_data['mean_reward_real'] += 0.00001

all_data.sort_values(by=['benchmark', 'algorithm'], inplace=True)
all_data.head()

Unnamed: 0,filename,c,p_slide,p_trap,time_limit,mean_reward,std_reward,mean_penalty,std_penalty,feasible,...,agent,benchmark,algorithm,t,feasible_low,feasible_high,feasible_mean,reward,time,mean_reward_real
13,final_13,0.0,0.0,0.1,10.0,0.0,1.5e-05,0.167,0.089955,False,...,DualRAMCP,final_13_slide0.0_trap0.1,DualRAMCP_t10,18.564818,False,False,False,,,5.598762
591,final_13,0.1,0.0,0.1,10.0,0.0,1.325742,0.11,0.09266,True,...,DualRAMCP,final_13_slide0.0_trap0.1,DualRAMCP_t10,1.079216,True,False,False,,,6.349539
1169,final_13,0.2,0.0,0.1,10.0,0.0,0.451607,0.219,0.144736,True,...,DualRAMCP,final_13_slide0.0_trap0.1,DualRAMCP_t10,1.312736,True,False,False,,,7.100317
1747,final_13,0.3,0.0,0.1,10.0,0.0,0.422118,0.252,0.115889,True,...,DualRAMCP,final_13_slide0.0_trap0.1,DualRAMCP_t10,-4.141887,True,True,True,,,7.296156
2325,final_13,0.4,0.0,0.1,10.0,0.0,0.219058,0.359,0.192325,True,...,DualRAMCP,final_13_slide0.0_trap0.1,DualRAMCP_t10,-2.131808,True,True,True,,,7.328975


### Plot of a single instance

In [None]:
import matplotlib.pyplot as plt

# Plot meand reward vs. c
# Each benchmark is a subplot
# Each algorithm is a line
# Use `std_reward` for error bars


for time in [5, 10, 25, 50, 100]:
    filtered_data = agent_data[(agent_data['time_limit'] == time) | (agent_data['time_limit'].isnull())]

    # Create a FacetGrid with the benchmark feature
    g = sns.FacetGrid(filtered_data, col="benchmark", col_wrap=4, height=4, aspect=1.5)

    # Do not show the plot in a notebook
    plt.ioff()

    # Map the lineplot to each subplot
    g.map_dataframe(sns.lineplot, x='c', y='mean_reward', hue='algorithm', err_style='band', err_kws={'alpha': 0.2})

    # Iterate through each subplot to add error bars manually
    for ax, (benchmark_value, subset) in zip(g.axes.flatten(), filtered_data.groupby('benchmark')):
        for algorithm_value in subset['algorithm'].unique():
            subset_algo = subset[subset['algorithm'] == algorithm_value]
            ax.fill_between(
                subset_algo['c'],
                subset_algo['mean_reward'] - subset_algo['std_reward'],
                subset_algo['mean_reward'] + subset_algo['std_reward'],
                alpha=0.1
            )
            non_feasible = subset_algo[subset_algo['feasible'] == False]
            ax.scatter(non_feasible['c'], non_feasible['mean_reward'], color='red', marker='x', s=50, label='Non-feasible')
        ax.grid(True)

    # Adjust the titles and labels
    g.set_titles(col_template="{col_name}")
    g.set_axis_labels("C", "Mean Reward")
    g.add_legend(title='Algorithm')

    g.savefig(output_path / f'mean_reward_vs_c_t{time}.svg')




### Tables

In [15]:
# minimum of mean_reward and mean_reward_real
all_data['min_mean_reward'] = all_data[['mean_reward', 'mean_reward_real']].min(axis=1)
all_data['cvm'] = all_data['feasible_mean'] * all_data['min_mean_reward']
all_data['cvl'] = all_data['feasible_low'] * all_data['min_mean_reward']
all_data['cvh'] = all_data['feasible_high'] * all_data['min_mean_reward']

# for m in ['cvm', 'cvl', 'cvh']:
#     all_data[m] /= all_data.groupby('algorithm')[m].mean().max()

# all_data['cvm'] /= all_data['cvm'].max()
# all_data['cvl'] /= all_data['cvl'].max()
# all_data['cvh'] /= all_data['cvh'].max()

all_data['cvmn'] = all_data['feasible_mean'] * all_data['min_mean_reward'] / (all_data['mean_reward_real'])
all_data['cvln'] = all_data['feasible_low'] * all_data['min_mean_reward'] / (all_data['mean_reward_real'])
all_data['cvhn'] = all_data['feasible_high'] * all_data['min_mean_reward'] / (all_data['mean_reward_real'])

all_data.groupby(['algorithm'])[[
    'feasible_low', 'feasible_mean', 'feasible_high',
    'cvl', 'cvm', 'cvh',
    'cvln', 'cvmn', 'cvhn',
]].mean().sort_values(by='feasible_mean', ascending=False)

Unnamed: 0_level_0,feasible_low,feasible_mean,feasible_high,cvl,cvm,cvh,cvln,cvmn,cvhn
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
ParetoUCT_t5,1.0,0.921569,0.563725,-1e-05,-1e-05,0.0,0.009804,0.009804,0.0
ParetoUCT_t10,0.995098,0.916667,0.593137,-1e-05,-1e-05,0.0,0.009804,0.009804,0.0
ParetoUCT_t25,1.0,0.906863,0.534314,-1e-05,-1e-05,0.0,0.009804,0.009804,0.0
ParetoUCT_t50,1.0,0.887255,0.509804,-1e-05,-1e-05,0.0,0.009804,0.009804,0.0
LP,0.882353,0.882353,0.882353,-1e-05,-1e-05,-1e-05,0.009804,0.009804,0.009804
DualUCT_t50,0.936275,0.808824,0.52451,-1e-05,-1e-05,0.0,0.009804,0.009804,0.0
DualUCT_t25,0.936275,0.779412,0.436275,-1e-05,-1e-05,0.0,0.009804,0.009804,0.0
DualUCT_t10,0.901961,0.754902,0.372549,-1e-05,-1e-05,0.0,0.009804,0.009804,0.0
DualUCT_t5,0.921569,0.642157,0.303922,-1e-05,-1e-05,0.0,0.009804,0.009804,0.0
RAMCP_t50,0.862745,0.598039,0.151961,-1e-05,-1e-05,0.0,0.009804,0.009804,0.0


In [7]:
all_data.loc[all_data.c <= 0.2].groupby(['algorithm'])[['feasible_low', 'feasible_mean', 'feasible_high', 'cvl', 'cvm', 'cvh']].mean().sort_values(by='feasible_mean', ascending=False)

Unnamed: 0_level_0,feasible_low,feasible_mean,feasible_high,cvl,cvm,cvh
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LP,0.993197,0.993197,0.993197,2.0,2.0,-inf
ParetoUCT_t5,0.996599,0.908163,0.391156,2.0,2.0,
ParetoUCT_t10,0.986395,0.887755,0.360544,2.0,2.0,
ParetoUCT_t50,0.993197,0.877551,0.346939,2.0,2.0,
ParetoUCT_t25,0.996599,0.860544,0.35034,2.0,2.0,
DualUCT_t50,0.911565,0.772109,0.329932,1.833333,1.666667,
DualUCT_t25,0.908163,0.744898,0.289116,1.666667,1.666667,
DualUCT_t10,0.901361,0.72449,0.241497,1.666667,1.666667,
RAMCP_t50,0.938776,0.690476,0.098639,2.0,2.0,
DualUCT_t5,0.880952,0.680272,0.173469,1.666667,1.666667,


In [8]:
all_data.loc[all_data.c >= 0.2].groupby(['algorithm'])[['feasible_low', 'feasible_mean', 'feasible_high', 'cvl', 'cvm', 'cvh']].mean().sort_values(by='feasible_mean', ascending=False)

Unnamed: 0_level_0,feasible_low,feasible_mean,feasible_high,cvl,cvm,cvh
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LP,1.0,1.0,1.0,0.0,0.0,
ParetoUCT_t5,1.0,0.982993,0.853741,0.0,0.0,
ParetoUCT_t10,0.996599,0.969388,0.802721,0.0,0.0,
ParetoUCT_t50,0.993197,0.945578,0.785714,0.0,0.0,
ParetoUCT_t25,0.996599,0.928571,0.795918,0.0,0.0,
DualUCT_t50,0.965986,0.853741,0.663265,0.0,0.0,
DualUCT_t25,0.969388,0.840136,0.585034,0.0,0.0,
DualUCT_t10,0.959184,0.792517,0.5,0.0,0.0,
DualUCT_t5,0.969388,0.721088,0.380952,0.0,0.0,
RAMCP_t50,0.795918,0.547619,0.357143,0.0,0.0,
