In [1]:
import pandas as pd
from pathlib import Path
import json
from pprint import pprint
import math
from tqdm import tqdm
import seaborn as sns

In [2]:
def get_in_road_percentage(steps_df: pd.DataFrame) -> float:
    ret = steps_df["out_of_road"].value_counts(normalize=True).at[False]
    return ret

In [3]:
def get_n_crashes(steps_df: pd.DataFrame) -> int:
    """Count number of crash episodes to not count same crash multiple times"""
    try:
        # count number of times "crash" becomes True for some time
        n_crashes = steps_df["crash"].diff().value_counts().at[True]

        # need to divide by 2 beacouse diff counts twice
        n_crashes /= 2

        # just in case crash is last episode and we have 3.5 crash episodes make it 4
        n_crashes = math.ceil(n_crashes)
    except KeyError:
        n_crashes = 0

    return n_crashes

In [4]:
def process_steps(steps_infos: list) -> dict:
    """Accepts a list of steps and returns a dict of interesting data"""

    steps_df = pd.DataFrame(steps_infos)
    steps_data = {}
    last_step = steps_df.iloc[-1]

    steps_data = {
        "termination.arrive_dest": last_step["arrive_dest"],
        "termination.timeout": last_step["max_step"],
        "route_completion": last_step["route_completion"],
        "seed": last_step["env_seed"],
        "in_road_percentage": get_in_road_percentage(steps_df),
        "n_crashes": get_n_crashes(steps_df),
    }

    return steps_data

In [5]:
def get_scenarios_df(dir: Path):
    paths = list(dir.glob("*/*.json"))

    scenarios = []
    for file_path in tqdm(paths):

        with open(file_path, "r") as f:
            try:
                scenario_data = json.load(f)
            except json.JSONDecodeError:
                print(file_path)
                raise ValueError

        dir = file_path.__str__().split("/")[-2]
        _, dr, _, dt = dir.split("_")
        scenario_data["decision_repeat"] = int(dr)
        scenario_data["dt"] = float(dt)

        steps_infos = scenario_data.pop("steps_infos")
        scenario_data.update(process_steps(steps_infos))
        scenarios.append(scenario_data)
    return pd.DataFrame(scenarios)

In [6]:
df_1 = get_scenarios_df(Path("data/benchmarking"))
df_2 = get_scenarios_df(Path("data/benchmarking_2"))

100%|██████████| 1200/1200 [00:03<00:00, 355.37it/s]
100%|██████████| 1200/1200 [00:03<00:00, 360.70it/s]


In [7]:
def process(df):
    return (
        df.set_index(
            [
                "dt",
                "decision_repeat",
                "seed",
            ],
            verify_integrity=True,
        )
        .sort_index()
        .drop(
            columns=[
                "start_ts",
                "initialized_ts",
                "scenario_done_ts",
                "init_time",
                "scenario_time",
                "total_time",
            ],
            axis=1,
        )
    )

In [8]:
df_1 = process(df_1)
df_2 = process(df_2)

In [9]:
df_1.compare(df_2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,termination.arrive_dest,termination.arrive_dest,route_completion,route_completion,in_road_percentage,in_road_percentage,n_crashes,n_crashes
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,self,other,self,other,self,other,self,other
dt,decision_repeat,seed,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0.04,20,50,False,True,0.562882,0.994,0.990385,1.0,2.0,3.0
