In [None]:
import os
import pandas as pd
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from scipy.stats import ttest_ind

In [None]:
def get_infractions(record):
    infraction_counts = {
        'collisions_layout': 0,
        'collisions_pedestrian': 0,
        'collisions_vehicle': 0,
        'outside_route_lanes': 0,
        'red_light': 0,
        'route_dev': 0,
        'route_timeout': 0,
        'stop_infraction': 0,
        'vehicle_blocked': 0
    }
    for key, value in record["infractions"].items():
        infraction_counts[key] += len(value)
    return infraction_counts

In [None]:
def get_route_df(models_path):
    # Define the column names
    columns = [
        'model_name', 'model_number', 'run', 'route', 'score_composed', 'score_penalty', 'score_route',
        'collisions_layout', 'collisions_pedestrian', 'collisions_vehicle', 'outside_route_lanes',
        'red_light', 'route_dev', 'route_timeout', 'stop_infraction', 'vehicle_blocked'
    ]

    # Create an empty DataFrame with the specified columns
    df = pd.DataFrame(columns=columns)

    # for model in os.listdir(f"{exp_path}"):
    for model_path in models_path:
        # if model in ["finetuned_baseline__m1","finetuned_baseline__m2","finetuned_baseline__m3","finetuned_baseline__m4","finetuned_baseline__m5","fnc2f__lm1e3__4prop__m1","fnc2f__lm1e3__4prop__m2","fnc2f__lm1e3__4prop__m3","fnc2f__lm1e3__4prop__m4","fnc2f__lm1e3__4prop__m5"]:
        for run in os.listdir(f"{model_path}"):
            model_name = model_path.split("/")[-2].split("__")[0] + f"__{model_path.split('/')[-1].split('__')[0]}"
            model_number = model_path.split("/")[-2].split("__m")[1]
            results_path = f"{model_path}/{run}/eval_results.json"
            # Read json file
            with open(results_path) as f:
                results = json.load(f)
            if len(results["_checkpoint"]["records"]) != 10:
                print(f"No results in {results_path} | {len(results['_checkpoint']['records'])} ")
                continue
            for idx in range(10):
                record = results['_checkpoint']['records'][idx]
                infraction_counts = get_infractions(record)
                scores = record["scores"]
                
                row = [model_name, model_number, int(run.split('run')[1]), idx]
                row.extend(list(scores.values()))
                row.extend(list(infraction_counts.values()))

                numeric_row = [float(x) for x in row[1:]]
                row = [row[0]] + numeric_row

                df.loc[len(df)] = row
    return df

In [None]:
def plot_dist(stop_inf_no_pl, stop_inf_pl):
    data = pd.DataFrame({
        'No Pl': stop_inf_no_pl,
        'Pl': stop_inf_pl
    })
    # Plotting the histograms
    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    sns.histplot(stop_inf_no_pl, kde=True, color='blue', label='No Pl', bins=10)
    sns.histplot(stop_inf_pl, kde=True, color='red', label='Pl', bins=10)
    plt.legend()
    plt.title('Histogram of No Pl and Pl')

    # Plotting the box plots
    plt.subplot(1, 2, 2)
    sns.boxplot(data=data)
    plt.title('Box Plot of No Pl and Pl')

    plt.tight_layout()
    plt.show()

In [None]:
def ttest(stop_inf_no_pl, stop_inf_pl, print_results=False):
    t_statistic, p_value = ttest_ind(stop_inf_no_pl, stop_inf_pl)
    data = pd.DataFrame({
        'No Pl': stop_inf_no_pl,
        'Pl': stop_inf_pl
    })
    aux = data.aggregate(['mean', 'std'])
    improvement = (aux["No Pl"]["mean"] - aux["Pl"]["mean"]) / aux["No Pl"]["mean"] * 100
    
    if print_results:
        print(f"t-statistic: {t_statistic}")
        print(f"P-value: {p_value}")
        if p_value < 0.05:
            print("There is a significant difference!")
        print("---")
        print(f"No Pl: {aux['No Pl']['mean']}")
        print(f"Pl: {aux['Pl']['mean']}")
        print(f"Improvement: {improvement:.2f}%")
    
    return t_statistic, p_value, improvement

# Table

In [None]:
attributes = ["score_composed", "collisions_layout", "collisions_pedestrian", "collisions_vehicle", "outside_route_lanes", "red_light", "route_dev", "route_timeout", "stop_infraction", "vehicle_blocked"]
avgs = {
    "pl": {},
    "nopl": {}
}
for att in attributes:
    avgs["pl"][att] = 0
    avgs["nopl"][att] = 0

results_df = pd.DataFrame(columns=["Treatment", *attributes])
studies = ["e3", "e5", "e7"]
base_path = "./results_summary_interfuser/"
for study in studies:
    n_epochs = int(study.split("e")[1])
    model_df = {}
    for model_type in ["pl", "nopl"]:
        if model_type == "nopl":
            model_name = "finetuned_baseline"
        else:
            model_name = "nc2f__lm1e3__4prop"
        study_paths = []
        for m in range(1,6):
            study_path = base_path + f"{model_name}__m{m}/{study}__original"
            study_paths.append(study_path)
        route_df = get_route_df(study_paths)
        if route_df.empty:
            continue
        else:
            model_df[f"{model_type}"] = route_df.groupby(['model_name', 'model_number', 'run']).agg(['mean'])

    row1 = [f"T4PC {n_epochs}"]
    row2 = [f"Base {n_epochs}"]
    for att in attributes:
        pl_data = model_df["pl"][att]["mean"].tolist()
        nopl_data = model_df["nopl"][att]["mean"].tolist()
        min_data = min(len(nopl_data), len(pl_data))
        statistics, p_value, improvement = ttest(nopl_data[:min_data], pl_data[:min_data])

        s1 = f"{model_df['pl'][att]['mean'].mean():.2f}$\\pm${model_df['pl'][att]['mean'].std():.2f}"
        avgs["pl"][att] += model_df['pl'][att]['mean'].mean()
        s2 = f"{model_df['nopl'][att]['mean'].mean():.2f}$\\pm${model_df['nopl'][att]['mean'].std():.2f}"
        avgs["nopl"][att] += model_df['nopl'][att]['mean'].mean()
        if att == "score_composed":
            improvement = improvement * -1               
        if p_value < 0.05:
            if improvement > 0:
                s1 = "\\textbf{" + s1 + "}"
            else:
                s2 = "\\textbf{" + s2 + "}"
        row1.append(s1)
        row2.append(s2)
    results_df.loc[len(results_df)] = row1
    results_df.loc[len(results_df)] = row2
for att in attributes:
    avgs["pl"][att] /= len(studies)
    avgs["nopl"][att] /= len(studies)

In [None]:
results_df

In [None]:
results_df_extended = results_df.copy()
results_df_extended.rename(columns={
    'score_composed': r'\makecell{Driving \\ Score} $\uparrow$',
    'collisions_pedestrian': r'\makecell{Collision \\ Pedestrians} $\downarrow$',
    'collisions_vehicle': r'\makecell{Collision \\ Vehicles} $\downarrow$',
    'collisions_layout': r'\makecell{Collision \\ Layout} $\downarrow$',
    'red_light': r'\makecell{Red Light \\ Infraction} $\downarrow$',
    'stop_infraction': r'\makecell{Stop Sign \\ Infraction} $\downarrow$',
    'outside_route_lanes': r'\makecell{Outside \\ Lanes} $\downarrow$',
    'route_dev': r'\makecell{Route \\ Deviation} $\downarrow$',
    'route_timeout': r'\makecell{Route \\ Timeout} $\downarrow$',
    'vehicle_blocked': r'\makecell{Vehicle \\ Blocked} $\downarrow$',
    }, inplace=True)
results_df_extended

In [None]:
print(results_df_extended.to_latex(index=False, escape=False))

In [None]:
results_df_short = results_df.reindex(columns=["Treatment", "score_composed", "collisions_pedestrian", "red_light", "route_timeout"])
results_df_short.rename(columns={
    'score_composed': r'\makecell{Driving \\ Score} $\uparrow$',
    'collisions_pedestrian': r'\makecell{Collision \\ Pedestrians} $\downarrow$',
    'red_light': r'\makecell{Red Light \\ Infraction} $\downarrow$',
    'route_timeout': r'\makecell{Route \\ Timeout} $\downarrow$',
    }, inplace=True)
results_df_short

In [None]:
print(results_df_short.to_latex(index=False, escape=False))