# Evaluation Analysis

In [2]:
# imports
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go

In [37]:
# constants
INPUT_FILE = '../../results/hrl_evaluation_raw_results_20250811_124438.pickle'
BASLINE_FILE = '../../results/greedy_baseline_raw_results_20250811_121744.pickle'

metrics = [
    'mean_satisfied_ratio',
    'mean_gini_index',
    'mean_rebalanced_vehicles_manually',
    'mean_rebalanced_vehicles_incentives'
]
pretty_labels = {
        "mean_satisfied_ratio": "Satisfied Demand Ratio",
        "mean_gini_index": "Gini Coefficient",
        "mean_rebalanced_vehicles_manually": "Rebalanced Vehicles (Manual)",
        "mean_rebalanced_vehicles_incentives": "Rebalanced Vehicles (Incentives)",
    }

In [4]:
HE_RED = "#E2001A"
HE_BLUE = "#002D72"
HE_GRAY = "#5A5A5A"
HE_LIGHT_GRAY = "#D1D1D1"

pio.templates["esslingen_cd"] = go.layout.Template(
    layout=go.Layout(
        colorway=[HE_BLUE, HE_RED, HE_GRAY, "#2C74B3", "#A1C9F4"],
        font=dict(
            family="Arial, sans-serif",
            size=14,
            color=HE_BLUE  # Use the dark blue for text
        ),
        title=dict(
            font=dict(size=20, color=HE_BLUE)
        ),
        paper_bgcolor="white",
        plot_bgcolor="white",
        xaxis=dict(
            gridcolor=HE_LIGHT_GRAY,
            linecolor=HE_BLUE,
            zerolinecolor=HE_LIGHT_GRAY
        ),
        yaxis=dict(
            gridcolor=HE_LIGHT_GRAY,
            linecolor=HE_BLUE,
            zerolinecolor=HE_LIGHT_GRAY
        ),
    )
)

pio.templates.default = "esslingen_cd"

In [5]:
evaluation_data = pd.read_pickle(INPUT_FILE)
baseline_data = pd.read_pickle(BASLINE_FILE)

In [10]:
evaluation_data['model'] = evaluation_data['configuration'].str.replace(r'_(900|600|1200)$', '', regex=True)
evaluation_data.head()

Unnamed: 0,trial,seed,configuration,manual_rebalancing,incentive_rebalancing,fleet_size,mean_satisfied_ratio,mean_rebalanced_vehicles_manually,mean_rebalanced_vehicles_incentives,max_satisfied_ratio,min_satisfied_ratio,mean_gini_index,model
0,0,15795,HRL_Both_Enabled_900,True,True,900,0.603761,3.5325,55.085,1.0,0.232177,0.596866,HRL_Both_Enabled
1,0,15795,HRL_Manual_Only_900,True,False,900,0.840745,7.5425,0.0,1.0,0.309148,0.290671,HRL_Manual_Only
2,0,15795,HRL_Incentive_Only_900,False,True,900,0.54653,0.0,55.525,1.0,0.179811,0.679893,HRL_Incentive_Only
3,0,15795,HRL_Both_Enabled_600,True,True,600,0.487674,5.615,54.0525,1.0,0.164669,0.660453,HRL_Both_Enabled
4,0,15795,HRL_Manual_Only_600,True,False,600,0.723612,5.9125,0.0,1.0,0.215142,0.355335,HRL_Manual_Only


In [11]:
baseline_data['model'] = baseline_data['configuration'].apply(lambda x: f"Baseline_{x}")
baseline_data['model'] = baseline_data['model'].str.replace(r'_(900|600|1200)$', '', regex=True)
baseline_data.head()    

Unnamed: 0,trial,seed,configuration,manual_rebalancing,incentive_rebalancing,fleet_size,mean_satisfied_ratio,mean_rebalanced_vehicles_manually,mean_rebalanced_vehicles_incentives,max_satisfied_ratio,min_satisfied_ratio,mean_gini_index,model
0,0,15795,Both_Enabled_900,True,True,900,0.785096,0.02,35.865,1.0,0.271924,0.376491,Baseline_Both_Enabled
1,0,15795,Manual_Only_900,True,False,900,0.804218,0.02,0.0,1.0,0.285174,0.406844,Baseline_Manual_Only
2,0,15795,Incentive_Only_900,False,True,900,0.779028,0.0,36.685,1.0,0.277603,0.384144,Baseline_Incentive_Only
3,0,15795,No_Rebalancing_900,False,False,900,0.804529,0.0,0.0,1.0,0.297792,0.392109,Baseline_No_Rebalancing
4,0,15795,Both_Enabled_600,True,True,600,0.682681,0.14,46.3425,1.0,0.2,0.427208,Baseline_Both_Enabled


In [16]:
full_results = pd.concat([evaluation_data, baseline_data], axis=0, ignore_index=True)
full_results.head()

Unnamed: 0,trial,seed,configuration,manual_rebalancing,incentive_rebalancing,fleet_size,mean_satisfied_ratio,mean_rebalanced_vehicles_manually,mean_rebalanced_vehicles_incentives,max_satisfied_ratio,min_satisfied_ratio,mean_gini_index,model
0,0,15795,HRL_Both_Enabled_900,True,True,900,0.603761,3.5325,55.085,1.0,0.232177,0.596866,HRL_Both_Enabled
1,0,15795,HRL_Manual_Only_900,True,False,900,0.840745,7.5425,0.0,1.0,0.309148,0.290671,HRL_Manual_Only
2,0,15795,HRL_Incentive_Only_900,False,True,900,0.54653,0.0,55.525,1.0,0.179811,0.679893,HRL_Incentive_Only
3,0,15795,HRL_Both_Enabled_600,True,True,600,0.487674,5.615,54.0525,1.0,0.164669,0.660453,HRL_Both_Enabled
4,0,15795,HRL_Manual_Only_600,True,False,600,0.723612,5.9125,0.0,1.0,0.215142,0.355335,HRL_Manual_Only


## Bar Charts with error bars

In [None]:
agg = (full_results.copy().groupby(['fleet_size', 'model'])[metrics]
         .agg(['mean', 'std'])
         .reset_index())
agg = agg[agg['model'] != 'Baseline_Incentive_Only']
agg = agg[agg['model'] != 'Baseline_Manual_Only']
agg.reset_index(inplace=True)
agg.head(15)

Unnamed: 0_level_0,index,fleet_size,model,mean_satisfied_ratio,mean_satisfied_ratio,mean_gini_index,mean_gini_index,mean_rebalanced_vehicles_manually,mean_rebalanced_vehicles_manually,mean_rebalanced_vehicles_incentives,mean_rebalanced_vehicles_incentives
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,mean,std,mean,std,mean,std,mean,std
0,0,600,Baseline_Both_Enabled,0.684941,0.007384,0.434619,0.007346,0.24575,0.159634,46.281625,0.701642
1,3,600,Baseline_No_Rebalancing,0.69558,0.008171,0.470092,0.007953,0.0,0.0,0.0,0.0
2,4,600,HRL_Both_Enabled,0.470152,0.024945,0.669418,0.028578,5.204375,0.370039,53.8865,0.298828
3,5,600,HRL_Incentive_Only,0.495697,0.007579,0.694556,0.010298,0.0,0.0,54.564625,0.403505
4,6,600,HRL_Manual_Only,0.717566,0.037089,0.364393,0.025103,6.6575,1.200745,0.0,0.0
5,7,900,Baseline_Both_Enabled,0.787557,0.007757,0.380673,0.005681,0.02,0.0,35.756625,0.89942
6,10,900,Baseline_No_Rebalancing,0.797221,0.008036,0.40591,0.007701,0.0,0.0,0.0,0.0
7,11,900,HRL_Both_Enabled,0.598466,0.019052,0.605241,0.021342,3.919,0.67495,55.449375,0.424766
8,12,900,HRL_Incentive_Only,0.540115,0.007976,0.683456,0.008119,0.0,0.0,55.796375,0.350319
9,13,900,HRL_Manual_Only,0.838526,0.002923,0.297057,0.004442,7.48525,0.202146,0.0,0.0


In [34]:
def plot_fleet_barchart(data: pd.DataFrame, metric: str):
    agg_data = (
        data.groupby("model")[metric]
        .agg(mean="mean", std="std")
        .reset_index()
    )
    x = agg_data["model"].astype(str)
    y = agg_data["mean"]
    err = agg_data["std"].fillna(0.0)

    y_title = pretty_labels.get(metric, metric)

    y_is_ratio = metric in {"mean_satisfied_ratio", "mean_gini_index"}

    fig = go.Figure(
        data=[
            go.Bar(
                x=x,
                y=y,
                error_y=dict(type="data", array=err, visible=True)
            )
        ]
    )

    fleet_str = None
    if "fleet_size" in agg_data.columns:
        uniq = agg_data["fleet_size"].unique()
        if len(uniq) == 1:
            fleet_str = str(uniq[0])

    title = f"{y_title} — Fleet size: {fleet_str}" if fleet_str else y_title

    fig.update_layout(
        title=title,
        xaxis_title="Model",
        yaxis_title=y_title,
        bargap=0.25,
        template="plotly",
        height=420,
        width=700,
    )

    if y_is_ratio:
        fig.update_yaxes(tickformat=".0%")  # display as percentage

    return fig


In [36]:
plot_fleet_barchart(full_results[full_results['fleet_size'] == 900], 'mean_satisfied_ratio').show()

## Line Charts across fleet size

## Boxplots

## Scatter Plots

## Pair Plots 