In [1]:
import pandas as pd
from pathlib import Path
import json
from pprint import pprint
import math
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm

DATA_DIR = Path("data/benchmarking")

In [2]:
def get_in_road_percentage(steps_df: pd.DataFrame) -> float:
    ret = steps_df["out_of_road"].value_counts(normalize=True).at[False]
    return ret

In [3]:
def get_n_crashes(steps_df: pd.DataFrame) -> int:
    """Count number of crash episodes to not count same crash multiple times"""
    try:
        # count number of times "crash" becomes True for some time
        n_crashes = steps_df["crash"].diff().value_counts().at[True]

        # need to divide by 2 beacouse diff counts twice
        n_crashes /= 2

        # just in case crash is last episode and we have 3.5 crash episodes make it 4
        n_crashes = math.ceil(n_crashes)
    except KeyError:
        n_crashes = 0

    return n_crashes

In [4]:
def process_steps(steps_infos: list) -> dict:
    """Accepts a list of steps and returns a dict of interesting data"""

    steps_df = pd.DataFrame(steps_infos)
    steps_data = {}
    last_step = steps_df.iloc[-1]

    steps_data = {
        "termination.arrive_dest": last_step["arrive_dest"],
        "termination.timeout": last_step["max_step"],
        "route_completion": last_step["route_completion"],
        "seed": last_step["env_seed"],
        "in_road_percentage": get_in_road_percentage(steps_df),
        "n_crashes": get_n_crashes(steps_df),
    }

    return steps_data

In [5]:
def get_scenarios_df(dir: Path):
    paths = list(dir.glob("*/*.json"))

    scenarios = []
    for file_path in tqdm(paths):

        with open(file_path, "r") as f:
            scenario_data = json.load(f)

        dir = file_path.__str__().split("/")[-2]
        _, dr, _, dt = dir.split("_")
        scenario_data["decision_repeat"] = int(dr)
        scenario_data["dt"] = float(dt)

        steps_infos = scenario_data.pop("steps_infos")
        scenario_data.update(process_steps(steps_infos))
        scenarios.append(scenario_data)
    return pd.DataFrame(scenarios)

In [None]:
df = get_scenarios_df(DATA_DIR)

In [7]:
df = df.set_index(
    [
        "dt",
        "decision_repeat",
        "seed",
    ],
    verify_integrity=True,
)

df = df.sort_index()

In [None]:
df["n_crashes"].sort_values(ascending=False).head(20)

In [9]:
# ! Problem changing the values here in the analysis can change % error

df["driving_score"] = (
    df["route_completion"] * df["in_road_percentage"] * (0.9) ** df["n_crashes"]
)

In [None]:
oracle_ds = df.xs((0.02, 5))["driving_score"]
pd.set_option("display.float_format", lambda x: "%.3f" % x)
oracle_ds.describe()

In [None]:
df["driving_score_error"] = (df["driving_score"] - oracle_ds).abs()
df

In [None]:
palette = sns.color_palette("hls", 3)
x_ticks = df.index.get_level_values("decision_repeat").unique().to_list()
x_ticks

## Bloxplots


In [None]:
ax = sns.boxplot(df, x="decision_repeat", y="total_time", hue="dt", palette=palette)
ax.set_ylim(0)
ax.set_ylabel("System time [s]")
ax.set_xlabel("Decision repeat")
# ax.set_xticks(x_ticks)

## Original plotting


In [None]:
agg = df.select_dtypes("number").groupby(by=["dt", "decision_repeat"]).mean()
agg

In [None]:
ax = sns.scatterplot(
    agg, x="decision_repeat", y="total_time", hue="dt", palette=palette
)
ax.set_ylim(0)
ax.set_ylabel("System time [s]")
ax.set_xlabel("Decision repeat")
ax.set_xticks(x_ticks)

In [None]:
ax = sns.scatterplot(
    agg, x="decision_repeat", y="driving_score_error", hue="dt", palette=palette
)

ax.set_xlabel("Decision repeat")
ax.set_xticks(x_ticks)

ax.set_ylabel("Driving Score Error")