In [103]:
import pandas as pd
from pathlib import Path
import json
from pprint import pprint
import math
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm

idx = pd.IndexSlice

DATA_DIR = Path("data/random_search")

In [104]:
def get_in_road_percentage(steps_df: pd.DataFrame) -> float:
    ret = steps_df["out_of_road"].value_counts(normalize=True).at[False]
    return ret

In [105]:
def get_n_sidewalk_crashes(steps_df: pd.DataFrame) -> int:
    """Count number of crash episodes to not count same crash multiple times"""
    try:
        # count number of times "crash" becomes True for some time
        n_crashes = steps_df["crash_sidewalk"].diff().value_counts().at[True]

        # need to divide by 2 beacouse diff counts twice
        n_crashes /= 2

        # just in case crash is last episode and we have 3.5 crash episodes make it 4
        n_crashes = math.ceil(n_crashes)
    except KeyError:
        n_crashes = 0

    return n_crashes

In [106]:
def process_steps(steps_infos: list) -> dict:
    """Accepts a list of steps and returns a dict of interesting data"""

    steps_df = pd.DataFrame(steps_infos)
    steps_data = {}
    last_step = steps_df.iloc[-1]

    steps_data = {
        "termination.arrive_dest": last_step["arrive_dest"],
        "termination.timeout": last_step["max_step"],
        "route_completion": last_step["route_completion"],
        "seed": last_step["env_seed"],
        "in_road_percentage": get_in_road_percentage(steps_df),
        "n_sidewalk_crashes": get_n_sidewalk_crashes(steps_df),
    }

    return steps_data

In [107]:
def get_random_search_traces():
    paths = list(DATA_DIR.rglob("*.json"))

    scenarios = []
    for file_path in tqdm(paths):
        with open(file_path, "r") as f:
            scenario_data = json.load(f)

        steps_infos = scenario_data.pop("steps_infos")
        scenario_data.update(process_steps(steps_infos))

        match file_path.parts:
            case (_, _, repetition, it, fid, _):
                # print(f"{rep =} {it= } {fid=}")
                scenario_data["repetition"] = int(rep)
                scenario_data["iteration"] = int(it)

                _, dr, _, dt = fid.split("_")
                scenario_data["decision_repeat"] = int(dr)
                scenario_data["dt"] = float(dt)

            case _:
                raise ValueError("Can't parse path!")

        scenarios.append(scenario_data)
    return pd.DataFrame(scenarios)

In [92]:
df = get_random_search_traces()

  0%|          | 0/612 [00:00<?, ?it/s]

100%|██████████| 612/612 [00:02<00:00, 298.80it/s]


In [93]:
df = df.set_index(
    ["decision_repeat", "dt", "repetition", "iteration"], verify_integrity=True
)
df = df.sort_index()
# df

In [94]:
# calculate driving score
# ! Problem changing the values here in the analysis can change % error


def calculate_driving_score(df):
    df["driving_score"] = (
        df["route_completion"]
        * df["in_road_percentage"]
        * (0.65) ** df["n_sidewalk_crashes"]
        * (0.60) ** df["n_crashed_vehicles"]
    )
    return df


df = calculate_driving_score(df)

In [102]:
# checking if time budgeting worked
# df["total_time"].groupby(["decision_repeat", "dt", "repetition"]).sum()

In [128]:
def get_solutions_of_iteration(curr_it, df):

    # get slice of data frame from iteration 0 to current iteration
    slice = df.loc[idx[:, :, :, : curr_it + 1], :]

    # get index of minumum driving score -> solution
    solutions_idx = (
        slice.groupby(
            [
                "decision_repeat",
                "dt",
                "repetition",
            ]
        )["driving_score"]
        .idxmin()
        .rename("solution_idx")
    )

    # get value of minimum driving score
    solutions_score = (
        slice.groupby(
            [
                "decision_repeat",
                "dt",
                "repetition",
            ]
        )["driving_score"]
        .min()
        .rename("solution_score")
    )

    solutions = pd.concat([solutions_idx, solutions_score], axis=1)
    solutions["it"] = curr_it
    solutions = solutions.reset_index().set_index(
        ["decision_repeat", "dt", "repetition", "it"]
    )

    return solutions

In [None]:
NUM_OF_ITERAITON = 25

sols_df = pd.concat(
    [get_solutions_of_iteration(it) for it in range(NUM_OF_ITERAITON)], axis=0
)

# at what iteration did we find the solution
sols_df["solution_it"] = sols_df["solution_idx"].apply(lambda x: x[-2])
# what is the route_id of the solution
sols_df["solution_route_id"] = sols_df["solution_idx"].apply(lambda x: x[-1])

# we don't need that once unpacked
sols_df = sols_df.drop("solution_idx", axis=1)

# add data from verification
sols_df = sols_df.join(oracle_df, on="solution_route_id")
sols_df = sols_df.sort_index()

In [129]:
df = df[["total_time", "driving_score"]]

In [134]:
get_solutions_of_iteration(43, df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,solution_idx,solution_score
decision_repeat,dt,repetition,it,Unnamed: 4_level_1,Unnamed: 5_level_1
5,0.02,0,43,"(5, 0.02, 0, 7)",0.357
5,0.02,1,43,"(5, 0.02, 1, 8)",0.357
5,0.03,0,43,"(5, 0.03, 0, 11)",0.091
5,0.03,1,43,"(5, 0.03, 1, 13)",0.357
5,0.04,0,43,"(5, 0.04, 0, 14)",0.01
5,0.04,1,43,"(5, 0.04, 1, 12)",0.163
10,0.02,0,43,"(10, 0.02, 0, 7)",0.339
10,0.02,1,43,"(10, 0.02, 1, 0)",0.214
10,0.03,0,43,"(10, 0.03, 0, 4)",0.061
10,0.03,1,43,"(10, 0.03, 1, 13)",0.164


In [131]:
# get index of minumum driving score -> solution
solutions_idx = (
    slice.groupby(
        [
            "decision_repeat",
            "dt",
            "repetition",
        ]
    )["driving_score"]
    .idxmin()
    .rename("solution_idx")
)


solutions_score = (
    slice.groupby(
        [
            "decision_repeat",
            "dt",
            "repetition",
        ]
    )["driving_score"]
    .min()
    .rename("solution_score")
)


solutions_score

decision_repeat  dt     repetition
5                0.020  0            0.357
                        1            0.357
                 0.030  0            0.091
                        1            0.357
                 0.040  0            0.010
                        1            0.163
10               0.020  0            0.339
                        1            0.214
                 0.030  0            0.061
                        1            0.164
                 0.040  0            0.029
                        1            0.045
15               0.020  0            0.124
                        1            0.215
                 0.030  0            0.044
                        1            0.025
                 0.040  0            0.031
                        1            0.054
20               0.020  0            0.115
                        1            0.012
                 0.030  0            0.018
                        1            0.010
                 0.