In [76]:
import pandas as pd
import seaborn as sns
from pathlib import Path
from scipy.stats import binomtest
import numpy as np

In [126]:
# Load the data
output_path = Path('../outputs/20240603-152411-RanomdContHallway')

lp_data = pd.read_csv(output_path / 'results_lp.csv', sep=';')
lp_data['mean_reward'] = lp_data['reward']
lp_data['std_reward'] = 0
lp_data['mean_penalty'] = lp_data['c']
# Set mean_penalty to 2 for infeasible solutions
lp_data.loc[lp_data['feasible'] == False, 'mean_penalty'] = 2
lp_data.loc[lp_data['time'] < 0, 'mean_reward'] = 0
lp_data['std_penalty'] = 0
lp_data['algorithm'] = 'LP'

# remove whatever is between second and third _
columns = lp_data['benchmark'].str.split('_', expand=True)
num_cols = columns.shape[1]
lp_data['filename'] = columns[0]
for i in range(1, num_cols-3):
    lp_data['file'] = lp_data['file'] + '_' + columns[i]
lp_data['p_slide'] = columns[num_cols-2].str.replace('slide', '').astype(float)
lp_data['p_trap'] = columns[num_cols-1].str.replace('trap', '').astype(float)
lp_data['benchmark'] = lp_data['filename'] + '_slide' + lp_data['p_slide'].astype(str) + '_trap' + lp_data['p_trap'].astype(str)

if 'computable' in lp_data.columns:
    lp_data.rename(columns={'computable': 'feasible'}, inplace=True)

lp_data['feasible_low'] = lp_data['feasible_high'] = lp_data['feasible_mean'] = lp_data['feasible']

lp_data.head()

Unnamed: 0,benchmark,c,feasible,reward,time,mean_reward,std_reward,mean_penalty,std_penalty,algorithm,filename,p_slide,p_trap,feasible_low,feasible_high,feasible_mean
0,map0-0.3-0.2_slide0.2_trap0.1,0.0,True,4.191245,25.074244,4.191245,0,0.0,0,LP,map0-0.3-0.2,0.2,0.1,True,True,True
1,map1-0.3-0.2_slide0.2_trap0.1,0.0,True,2.059145,18.771887,2.059145,0,0.0,0,LP,map1-0.3-0.2,0.2,0.1,True,True,True
2,map2-0.3-0.3_slide0.2_trap0.1,0.0,True,4.585093,34.245968,4.585093,0,0.0,0,LP,map2-0.3-0.3,0.2,0.1,True,True,True
3,map3-0.3-0.3_slide0.2_trap0.1,0.0,True,0.0,14.949799,0.0,0,0.0,0,LP,map3-0.3-0.3,0.2,0.1,True,True,True
4,map4-0.3-0.4_slide0.2_trap0.1,0.0,True,4.777878,16.687393,4.777878,0,0.0,0,LP,map4-0.3-0.4,0.2,0.1,True,True,True


In [127]:

import numpy as np
from scipy.stats import t

agent_data = None
agents = ['ParetoUCT', 'DualUCT', 'RAMCP', 'DualRAMCP']
for agent in agents:
    try:
        data = pd.read_csv(output_path / f'results_{agent}.csv', sep=';')
        data['agent'] = agent
        if agent_data is None:
            agent_data = data
        else:
            agent_data = pd.concat([agent_data, data])
    except FileNotFoundError:
        pass

agent_data['benchmark'] = agent_data['filename'] + \
    '_slide' + agent_data['p_slide'].astype(str) + \
    '_trap' + agent_data['p_trap'].astype(str)

agent_data['algorithm'] = agent_data['agent'] + '_t' + agent_data['time_limit'].astype(str)

if 'repetitions' not in agent_data.columns:
    agent_data['repetitions'] = 100

# t-statistic quantiles
agent_data['t'] = (agent_data['mean_penalty'] - agent_data['c']) * np.sqrt(agent_data['repetitions']) / agent_data['std_penalty']
agent_data.loc[agent_data['std_penalty'] == 0, 't'] = 0
agent_data['feasible_low'] = agent_data['t'] <= t(df=agent_data['repetitions']-1).ppf(0.95)
agent_data['feasible_high'] = agent_data['t'] <= t(df=agent_data['repetitions']-1).ppf(0.05)
agent_data['feasible_mean'] = agent_data['mean_penalty'] <= agent_data['c']

agent_data.head()

Unnamed: 0,filename,c,p_slide,p_trap,time_limit,mean_reward,std_reward,mean_penalty,std_penalty,mean_time_per_step,repetitions,agent,benchmark,algorithm,t,feasible_low,feasible_high,feasible_mean
0,map0-0.3-0.2,0.0,0.2,0.1,5,4.729957,0.566003,0.0,0.0,0.005071,100,ParetoUCT,map0-0.3-0.2_slide0.2_trap0.1,ParetoUCT_t5,0.0,True,False,True
1,map0-0.3-0.2,0.0,0.2,0.1,10,4.669962,0.587017,0.0,0.0,0.010142,100,ParetoUCT,map0-0.3-0.2_slide0.2_trap0.1,ParetoUCT_t10,0.0,True,False,True
2,map0-0.3-0.2,0.0,0.2,0.1,25,4.85996,0.348819,0.0,0.0,0.025253,100,ParetoUCT,map0-0.3-0.2_slide0.2_trap0.1,ParetoUCT_t25,0.0,True,False,True
3,map0-0.3-0.2,0.0,0.2,0.1,50,4.949986,0.219082,0.0,0.0,0.050354,100,ParetoUCT,map0-0.3-0.2_slide0.2_trap0.1,ParetoUCT_t50,0.0,True,False,True
4,map1-0.3-0.2,0.0,0.2,0.1,5,2.019982,0.140708,0.002,0.014071,0.005143,100,ParetoUCT,map1-0.3-0.2_slide0.2_trap0.1,ParetoUCT_t5,1.421411,True,False,False


In [145]:
# Merge the data
all_data = pd.concat([agent_data, lp_data], ignore_index=True)

# Add LP accurate values
real_values = lp_data[['mean_reward', 'benchmark', 'c']]

all_data = all_data.merge(real_values, on=['benchmark', 'c'], suffixes=('', '_real'))

m = all_data['mean_reward'].min()

all_data['mean_reward'] -= all_data['mean_reward']
all_data['mean_reward_real'] += 0.00001

all_data.sort_values(by=['benchmark', 'algorithm'], inplace=True)
all_data.head()

Unnamed: 0,filename,c,p_slide,p_trap,time_limit,mean_reward,std_reward,mean_penalty,std_penalty,mean_time_per_step,...,benchmark,algorithm,t,feasible_low,feasible_high,feasible_mean,feasible,reward,time,mean_reward_real
5,map0-0.3-0.2,0.0,0.2,0.1,10.0,1.999065,1.6e-05,0.0,0.0,0.010728,...,map0-0.3-0.2_slide0.2_trap0.1,DualUCT_t10,0.0,True,False,True,,,,4.191255
1279,map0-0.3-0.2,0.1,0.2,0.1,10.0,3.529419,1.298692,0.106,0.056533,0.010748,...,map0-0.3-0.2_slide0.2_trap0.1,DualUCT_t10,1.061331,True,False,False,,,,4.576946
2553,map0-0.3-0.2,0.2,0.2,0.1,10.0,4.089555,1.155762,0.19,0.088192,0.01072,...,map0-0.3-0.2_slide0.2_trap0.1,DualUCT_t10,-1.133893,True,False,True,,,,4.673017
3827,map0-0.3-0.2,0.35,0.2,0.1,10.0,4.329625,1.101681,0.293,0.131237,0.010676,...,map0-0.3-0.2_slide0.2_trap0.1,DualUCT_t10,-4.343276,True,True,True,,,,4.720945
5101,map0-0.3-0.2,0.5,0.2,0.1,10.0,4.549611,0.770533,0.443,0.134281,0.010622,...,map0-0.3-0.2_slide0.2_trap0.1,DualUCT_t10,-4.244838,True,True,True,,,,4.720945


### Plot of a single instance

In [None]:
import matplotlib.pyplot as plt

# Plot meand reward vs. c
# Each benchmark is a subplot
# Each algorithm is a line
# Use `std_reward` for error bars


for time in [5, 10, 25, 50, 100]:
    filtered_data = agent_data[(agent_data['time_limit'] == time) | (agent_data['time_limit'].isnull())]

    # Create a FacetGrid with the benchmark feature
    g = sns.FacetGrid(filtered_data, col="benchmark", col_wrap=4, height=4, aspect=1.5)

    # Do not show the plot in a notebook
    plt.ioff()

    # Map the lineplot to each subplot
    g.map_dataframe(sns.lineplot, x='c', y='mean_reward', hue='algorithm', err_style='band', err_kws={'alpha': 0.2})

    # Iterate through each subplot to add error bars manually
    for ax, (benchmark_value, subset) in zip(g.axes.flatten(), filtered_data.groupby('benchmark')):
        for algorithm_value in subset['algorithm'].unique():
            subset_algo = subset[subset['algorithm'] == algorithm_value]
            ax.fill_between(
                subset_algo['c'],
                subset_algo['mean_reward'] - subset_algo['std_reward'],
                subset_algo['mean_reward'] + subset_algo['std_reward'],
                alpha=0.1
            )
            non_feasible = subset_algo[subset_algo['feasible'] == False]
            ax.scatter(non_feasible['c'], non_feasible['mean_reward'], color='red', marker='x', s=50, label='Non-feasible')
        ax.grid(True)

    # Adjust the titles and labels
    g.set_titles(col_template="{col_name}")
    g.set_axis_labels("C", "Mean Reward")
    g.add_legend(title='Algorithm')

    g.savefig(output_path / f'mean_reward_vs_c_t{time}.svg')




### Tables

In [150]:
# minimum of mean_reward and mean_reward_real
all_data['min_mean_reward'] = all_data[['mean_reward', 'mean_reward_real']].min(axis=1)
all_data['cvm'] = all_data['feasible_mean'] * all_data['min_mean_reward']
all_data['cvl'] = all_data['feasible_low'] * all_data['min_mean_reward']
all_data['cvh'] = all_data['feasible_high'] * all_data['min_mean_reward']

for m in ['cvm', 'cvl', 'cvh']:
    all_data[m] /= all_data.groupby('algorithm')[m].mean().max()

# all_data['cvm'] /= all_data['cvm'].max()
# all_data['cvl'] /= all_data['cvl'].max()
# all_data['cvh'] /= all_data['cvh'].max()

all_data['cvmn'] = all_data['feasible_mean'] * all_data['min_mean_reward'] / (all_data['mean_reward_real'])
all_data['cvln'] = all_data['feasible_low'] * all_data['min_mean_reward'] / (all_data['mean_reward_real'])
all_data['cvhn'] = all_data['feasible_high'] * all_data['min_mean_reward'] / (all_data['mean_reward_real'])

all_data.groupby(['algorithm'])[[
    'feasible_low', 'feasible_mean', 'feasible_high',
    'cvl', 'cvm', 'cvh',
    'cvln', 'cvmn', 'cvhn',
]].mean().sort_values(by='feasible_mean', ascending=False)

Unnamed: 0_level_0,feasible_low,feasible_mean,feasible_high,cvl,cvm,cvh,cvln,cvmn,cvhn
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
LP,0.995918,0.995918,0.995918,1.0,1.0,1.0,0.995918,0.995918,0.995918
ParetoUCT_t5,0.997959,0.940816,0.604082,0.908563,0.874685,0.626219,0.898695,0.853867,0.554307
ParetoUCT_t10,0.989796,0.930612,0.581633,0.913033,0.875106,0.615646,0.902247,0.855573,0.542015
ParetoUCT_t50,0.995918,0.912245,0.55102,0.936572,0.879805,0.600055,0.926667,0.858569,0.525823
ParetoUCT_t25,0.997959,0.902041,0.567347,0.92855,0.865775,0.609025,0.919419,0.842139,0.536906
DualUCT_t50,0.942857,0.816327,0.485714,0.76419,0.673313,0.475976,0.562692,0.466843,0.425096
DualUCT_t25,0.938776,0.804082,0.42449,0.737914,0.642952,0.403538,0.537151,0.439233,0.360752
DualUCT_t10,0.934694,0.763265,0.361224,0.69413,0.570193,0.319409,0.508025,0.385181,0.287107
DualUCT_t5,0.92449,0.714286,0.279592,0.650484,0.505612,0.234328,0.473757,0.332786,0.211325
RAMCP_t50,0.865306,0.642857,0.244898,0.765928,0.570362,0.27693,0.655457,0.461938,0.239719


In [102]:
all_data.loc[all_data.c <= 0.2].groupby(['algorithm'])[['feasible_low', 'feasible_mean', 'feasible_high', 'cvl', 'cvm', 'cvh']].mean().sort_values(by='feasible_mean', ascending=False)

Unnamed: 0_level_0,feasible_low,feasible_mean,feasible_high,cvl,cvm,cvh
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LP,0.993197,0.993197,0.993197,3.670117,3.670117,3.670117
ParetoUCT_t5,0.996599,0.908163,0.391156,3.317367,3.108441,1.582457
ParetoUCT_t10,0.986395,0.887755,0.360544,3.323988,3.070218,1.49587
ParetoUCT_t50,0.993197,0.877551,0.346939,3.394925,3.096585,1.468325
ParetoUCT_t25,0.996599,0.860544,0.35034,3.372261,3.025146,1.462096
DualUCT_t50,0.911565,0.772109,0.329932,2.447619,2.087812,1.166684
DualUCT_t25,0.908163,0.744898,0.289116,2.399044,1.972691,0.996791
DualUCT_t10,0.901361,0.72449,0.241497,2.265859,1.792277,0.734945
RAMCP_t50,0.938776,0.690476,0.098639,2.90658,2.088755,0.402313
DualUCT_t5,0.880952,0.680272,0.173469,2.137229,1.596495,0.516986


In [104]:
all_data.loc[all_data.c >= 0.2].groupby(['algorithm'])[['feasible_low', 'feasible_mean', 'feasible_high', 'cvl', 'cvm', 'cvh']].mean().sort_values(by='feasible_mean', ascending=False)

Unnamed: 0_level_0,feasible_low,feasible_mean,feasible_high,cvl,cvm,cvh
algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LP,1.0,1.0,1.0,4.423177,4.423177,4.423177
ParetoUCT_t5,1.0,0.982993,0.853741,3.986731,3.934218,3.563987
ParetoUCT_t10,0.996599,0.969388,0.802721,4.026575,3.971781,3.448016
ParetoUCT_t50,0.993197,0.945578,0.785714,4.133191,3.993846,3.451526
ParetoUCT_t25,0.996599,0.928571,0.795918,4.090287,3.900346,3.463053
DualUCT_t50,0.965986,0.853741,0.663265,3.754367,3.394804,2.713755
DualUCT_t25,0.969388,0.840136,0.585034,3.603857,3.189351,2.325437
DualUCT_t10,0.959184,0.792517,0.5,3.352397,2.834394,1.860605
DualUCT_t5,0.969388,0.721088,0.380952,3.151487,2.43272,1.3377
RAMCP_t50,0.795918,0.547619,0.357143,3.305186,2.37195,1.663173
