In [2]:
import os, sys
import dotenv
dotenv.load_dotenv('../.env')

import numpy as np
import torch
import pandas as pd
from scipy.stats import kendalltau, pearsonr, spearmanr
import pandas as pd
from torchvision import transforms

import wandb
from collections import defaultdict
import json
from pathlib import Path
from tqdm import tqdm
from copy import deepcopy
import warnings

sys.path.append('../')
from common import OnnxModel
from metrics.metrics import calculate_open_loop_metrics, calculate_closed_loop_metrics
from dataloading.nvidia import NvidiaElvaDataset, NvidiaDataset

WANDB_ENTITY = os.getenv('WANDB_ENTITY')
WANDB_PROJECT = os.getenv('WANDB_PROJECT')

In [3]:
def wandb_model_path_parse(model_path):
    return model_path.replace('_models/', '').replace('.onnx', '')

def add_wandb_metrics(df):
    '''Adds eval metrics to the table, such as best MAE and Vista performance.
    '''
    api = wandb.Api()
    runs = api.runs(f'{WANDB_ENTITY}/{WANDB_PROJECT}')

    for run in runs:

        # training run
        any_drives_with_this_model = df.loc[df['training_run'] == run.id].size > 0
        if any_drives_with_this_model:
            best_mae = 2**32
            for i, row in run.history(keys=['mae']).iterrows():
                if row['mae'] < best_mae: best_mae = row['mae']

            df.loc[df['training_run'] == run.id, 'val_mae'] = best_mae
            df.loc[df['training_run'] == run.id, 'model_type'] = run.config['model_type']

        # evaluation run
        if run.state == 'finished' and 'offline-elva-evaluation' in run.tags:
            if run.summary.get('mae', None) is None: continue
            model_name = wandb_model_path_parse(run.config['model_path'])
            df.loc[df['model_name'] == model_name, 'elva_mae'] = run.summary.get('mae', None)
            df.loc[df['model_name'] == model_name, 'elva_whiteness'] = run.summary['whiteness']
            df.loc[df['model_name'] == model_name, 'elva_expert_whiteness'] = run.summary['expert_whiteness']

    df['real_interventions'] = df['real_interventions'].convert_dtypes()
    return df

In [4]:
# ONLINE METRICS

track_direction_change_location = np.array([-9683.68050786, -1542.68155186])
root_path = Path("/data/Bolt/end-to-end/drives-ebm-paper/")
expert_ds = NvidiaDataset([root_path / '2021-10-26-10-49-06_e2e_rec_ss20_elva_eval_chunk'])
expert_back_ds = NvidiaDataset([root_path / '2021-10-26-11-08-59_e2e_rec_ss20_elva_back_eval_chunk'])

def are_locations_close(loc_a, loc_b, threshold=50):
    return np.linalg.norm(loc_a - loc_b) < threshold

def get_closest_frame_by_loc(df, target_loc):
    locations = df[['position_x', 'position_y']].to_numpy().astype(np.float32)
    df['distance_to_target'] = np.linalg.norm(locations - target_loc)
    return df.loc[df['distance_to_target'].idxmin()]

def get_closest_row_idx_by_timestamp(df, dt):
    df['timestamp'] = pd.to_datetime(df['index'])
    return (abs(df['timestamp'] - dt)).idxmin()

def get_longest_intervention_periods(df):
    df['autonomous_next'] = df['autonomous'].shift(-1)
    starts_ends_df = df[(df['autonomous'] & (df['autonomous_next'] == False)) | ((df['autonomous'] == False) & df['autonomous_next'])]
    starts_ends = [row['row_id'] for i, row in starts_ends_df.iterrows()]
    starts = np.array(starts_ends)[::2]
    ends = np.array(starts_ends)[1::2]
    longest_idxs = np.argsort(ends - starts)
    return (starts[longest_idxs], ends[longest_idxs])

def split_back_forth_drive_into_two(dataset):

    frames_df = dataset.frames
    vehicle_cmd_df = dataset.vehicle_cmd_frames
    # find the longest intervention period
    found_direction_change = False
    for forward_end, forward_start in zip(*get_longest_intervention_periods(frames_df)):
        if are_locations_close(frames_df[frames_df['row_id'] == forward_end][['position_x', 'position_y']].to_numpy(), track_direction_change_location) or \
            are_locations_close(frames_df[frames_df['row_id'] == forward_start][['position_x', 'position_y']].to_numpy(), track_direction_change_location):
            found_direction_change = True
            break

    if not found_direction_change:
        print('Couldn\'t find the longest intervention in the track direction change location')
        return None

    # split the drive into two
    df1 = frames_df[frames_df['row_id'] <= forward_end]
    df2 = frames_df[frames_df['row_id'] > forward_start]

    forward_end_ts = pd.to_datetime(df1.iloc[-1]['index'])
    backward_start_ts = pd.to_datetime(df2.iloc[0]['index'])

    forward_end_idx = get_closest_row_idx_by_timestamp(vehicle_cmd_df, forward_end_ts)
    backward_end_idx = get_closest_row_idx_by_timestamp(vehicle_cmd_df, backward_start_ts)

    df1_vehicle_cmd = vehicle_cmd_df.iloc[:forward_end_idx]
    df2_vehicle_cmd = vehicle_cmd_df.iloc[backward_end_idx:]

    # save the pandas dataframes back into NvidiaDataset objects
    dataset_forward = deepcopy(dataset)
    dataset_backward = deepcopy(dataset)
    dataset_forward.frames = df1
    dataset_forward.vehicle_cmd_frames = df1_vehicle_cmd
    dataset_backward.frames = df2
    dataset_backward.vehicle_cmd_frames = df2_vehicle_cmd

    return dataset_forward, dataset_backward

def add_online_metrics(df):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        for i, row in tqdm(df.iterrows()):
            drives = row['drive']
            ds_forward = None
            ds_backward = None
            forward_metrics = {}
            backward_metrics = {}
            if len(drives) == 1:
                ds_combined = NvidiaDataset([root_path / drives[0]])
                if drives[0] not in ['2022-09-09-11-47-04', '2022-09-09-10-51-33-mdn-1-s1']: # unfinished single direction drive
                    ds_forward, ds_backward = split_back_forth_drive_into_two(ds_combined)
                else:
                    ds_forward = ds_combined
            elif len(drives) == 2:
                ds_forward = NvidiaDataset([root_path / drives[0]])
                ds_backward = NvidiaDataset([root_path / drives[1]])
            elif len(drives) == 3:
                assert '2022-08-31-15-18-55_elva_classifier_512_forward_continued' in drives[1]
                ds_forward = NvidiaDataset([root_path / drives[0], root_path / drives[1]])
                ds_backward = NvidiaDataset([root_path / drives[2]])

            if ds_forward:
                forward_metrics = calculate_closed_loop_metrics(ds_forward.frames, expert_ds.frames, ds_forward.vehicle_cmd_frames)
            if ds_backward:
                backward_metrics = calculate_closed_loop_metrics(ds_backward.frames, expert_back_ds.frames, ds_backward.vehicle_cmd_frames)

            try:
                del forward_metrics['interventions']
                del backward_metrics['interventions']
            except:
                pass

            metrics = {**forward_metrics}
            additive_metrics = ['distance']
            for k, v in backward_metrics.items():
                metrics[k] = metrics[k] + v
                if k not in additive_metrics:
                    metrics[k] = metrics[k] / 2

            metrics['distance_per_intervention'] = metrics['distance'] / row['real_interventions']
            df.loc[i, metrics.keys()] = metrics.values()
            
    return df

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f32e4a3cfd0>
)
/data/Bolt/end-to-end/drives-ebm-paper/2021-10-26-10-49-06_e2e_rec_ss20_elva_eval_chunk: length=13858, filtered=2
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f32e4a44ac0>
)
/data/Bolt/end-to-end/drives-ebm-paper/2021-10-26-11-08-59_e2e_rec_ss20_elva_back_eval_chunk: length=14249, filtered=1


In [4]:
# # columns = ['model_name', 'model_class', 'total_MAE', 'same_track_MAE', 'vista_interventions', 'real_interventions']
# columns = ['training_run', 'model_name', 'real_interventions', 'drive']

# # build a dataframe with the results
# rows = [
#     ['3ftnqxcb', 'ebm-512-s1', 9, ('e2e-ebm-512-s1-day1-forward_2022-09-20-11-00-41', 'e2e-ebm-512-s1-day1-backward_2022-09-20-11-11-14')],
#     ['3ftnqxcb', 'ebm-512-s1', 4, ('ebm-paper-ebm-512-s1-forward_2022-09-21-10-51-39', 'ebm-paper-ebm-512-s1-backward_2022-09-21-11-02-14')],
#     ['3ftnqxcb', 'ebm-512-s1', 1, ('ebm-paper-classifier-512-forward_2022-09-22-11-47-18', 'ebm-paper-classifier-512-backward_2022-09-22-11-58-03')],
#     ['3ftnqxcb', 'ebm-512-s1', 2, ('ebm-paper-ebm-512-s1-forward_2022-09-23-11-18-29', 'ebm-paper-ebm-512-s1-backwards_2022-09-23-11-07-13')],

#     ['3jk7cnqa', 'ebm-normal-1-s1', 5, ('e2e-ebm-normal-1-s1-day1-forward_2022-09-20-13-37-56', 'e2e-ebm-normal-1-s1-day1-backward_2022-09-20-13-48-42')],
#     ['3jk7cnqa', 'ebm-normal-1-s1', 20, ('ebm-paper-ebm-normal-1-s1-forward_2022-09-21-11-59-17', 'ebm-paper-ebm-normal-1-s1-backward_2022-09-21-12-10-33')], # RUINED BY RAIN
#     ['3jk7cnqa', 'ebm-normal-1-s1', 2, ('ebm-paper-ebm-normal-1-s1-forward_2022-09-22-12-15-22', 'ebm-paper-ebm-normal-1-s1-backward_2022-09-22-12-25-57')],
#     ['3jk7cnqa', 'ebm-normal-1-s1', 3, ('ebm-paper-ebm-normal-1-s1-forward_2022-09-23-12-28-45', 'ebm-paper-ebm-normal-1-s1-backwards_2022-09-23-12-17-08')],

#     ['2jvl4yhn', 'ebm-spatial-0-s2', 5, ('e2e-ebm-spatial-0-s2-day1-forward_2022-09-20-12-44-32', 'e2e-ebm-spatial-0-s2-day1-backward_2022-09-20-12-55-04')],
#     ['2jvl4yhn', 'ebm-spatial-0-s2', 5, ('ebm-paper-ebm-spatial-0-s2-forward_2022-09-21-11-13-54', 'ebm-paper-ebm-spatial-0-s2-backward_2022-09-21-11-24-29')],
#     ['2jvl4yhn', 'ebm-spatial-0-s2', 8, ('ebm-paper-ebm-spatial-0-s2-forward_2022-09-22-11-25-33', 'ebm-paper-ebm-spatial-0-s2-backward_2022-09-22-11-36-02')],
#     ['2jvl4yhn', 'ebm-spatial-0-s2', 4, ('ebm-paper-ebm-spatial-0-s2-forward_2022-09-23-11-43-09', 'ebm-paper-ebm-spatial-0-s2-backwards_2022-09-23-11-31-39')],

#     ['bxd5wtqk', 'mae-s2', 2, ('e2e-mae-s2-day1-forward_2022-09-20-11-25-14', 'e2e-mae-s2-day1-backward_2022-09-20-11-35-49')],
#     ['bxd5wtqk', 'mae-s2', 2, ('ebm-paper-mae-s2-forward_2022-09-21-12-22-57', 'ebm-paper-mae-s2-backward_2022-09-21-12-34-47')],
#     ['bxd5wtqk', 'mae-s2', 5, ('ebm-paper-mae-s2-forward_2022-09-22-10-02-03', 'ebm-paper-mae-s2-backward_2022-09-22-10-26-02')],
#     ['bxd5wtqk', 'mae-s2', 1, ('ebm-paper-mae-s2-forward_2022-09-23-10-31-24', 'ebm-paper-mae-s2-backward_2022-09-23-10-19-55')],

#     ['3g3wwx73', 'classifier-512', 7, ('e2e-classifier-512-day1-forward_2022-09-20-12-20-47', 'e2e-classifier-512-day1-backward_2022-09-20-12-31-35')],
#     ['3g3wwx73', 'classifier-512', 1, ('ebm-paper-classifier-512-forward_2022-09-21-11-36-50', 'ebm-paper-classifier-512-backward_2022-09-21-11-47-38')],
#     ['3g3wwx73', 'classifier-512', 7, ('ebm-paper-classifier-512-forward_2022-09-22-11-47-18', 'ebm-paper-classifier-512-backward_2022-09-22-11-58-03')],
#     ['3g3wwx73', 'classifier-512', 1, ('ebm-paper-classifier-512-forward_2022-09-23-12-05-48', 'ebm-paper-classifier-512-backwards_2022-09-23-11-54-32')],

#     ['1hbbr6dm', 'mdn-5-s1', 10, ('e2e-mdn-5-s1-day1-forward_2022-09-20-14-00-44', 'e2e-mdn-5-s1-day1-backward_2022-09-20-14-13-08')], # RUINED BY RAIN
#     ['1hbbr6dm', 'mdn-5-s1', 1,  ('ebm-paper-mdn-5-s1-forward_2022-09-21-10-29-09', 'ebm-paper-mdn-5-s1-backward_2022-09-21-10-39-43')],
#     ['1hbbr6dm', 'mdn-5-s1', 5, ('ebm-paper-mdn-5-s1-forward_2022-09-22-10-38-10', 'ebm-paper-mdn-5-s1-backward_2022-09-22-10-49-36')],
#     ['1hbbr6dm', 'mdn-5-s1', 5, ('ebm-paper-mdn-5-s1-forward_2022-09-23-10-55-27', 'ebm-paper-mdn-5-s1-backwards_2022-09-23-10-43-34')],
# ]

# df = pd.DataFrame(rows, columns=columns)
# df.loc[df['drive'] == ('e2e-ebm-512-s1-day1-forward_2022-09-20-11-00-41', 'e2e-ebm-512-s1-day1-backward_2022-09-20-11-11-14'), 'comment'] = 'strict Tambet\'s first drive of the day'
# df.loc[df['drive'] == ('ebm-paper-ebm-normal-1-s1-forward_2022-09-21-11-59-17', 'ebm-paper-ebm-normal-1-s1-backward_2022-09-21-12-10-33'), 'comment'] = 'ruined by rain'
# df.loc[df['drive'] == ('e2e-mdn-5-s1-day1-forward_2022-09-20-14-00-44', 'e2e-mdn-5-s1-day1-backward_2022-09-20-14-13-08'), 'comment'] = 'ruined by rain'
# df.loc[df['drive'] == ('ebm-paper-mae-s2-forward_2022-09-22-10-02-03', 'ebm-paper-mae-s2-backward_2022-09-22-10-26-02'), 'comment'] = 'sunny'
# df.loc[df['drive'] == ('ebm-paper-classifier-512-forward_2022-09-22-11-47-18', 'ebm-paper-classifier-512-backward_2022-09-22-11-58-03'), 'comment'] = 'sunny'
# df.loc[df['drive'] == ('e2e-classifier-512-day1-forward_2022-09-20-12-20-47', 'e2e-classifier-512-day1-backward_2022-09-20-12-31-35'), 'comment'] = 'sunny'

# # show df without column "training_run"
# df.drop(columns=['training_run', 'drive'])

In [5]:
columns = ['training_run', 'model_name', 'real_interventions', 'drive']

# build a dataframe with the results
rows = [
    # ['3ftnqxcb', 'ebm-512-s1', 9, ('e2e-ebm-512-s1-day1-forward_2022-09-20-11-00-41', 'e2e-ebm-512-s1-day1-backward_2022-09-20-11-11-14')],
    ['3ftnqxcb', 'ebm-512-s1', 4, ('ebm-paper-ebm-512-s1-forward_2022-09-21-10-51-39', 'ebm-paper-ebm-512-s1-backward_2022-09-21-11-02-14')],
    ['3ftnqxcb', 'ebm-512-s1', 1, ('ebm-paper-ebm-512-s1-forward_2022-09-22-11-02-58', 'ebm-paper-ebm-512-s1-backward_2022-09-22-11-13-52')],
    ['3ftnqxcb', 'ebm-512-s1', 2, ('ebm-paper-ebm-512-s1-forward_2022-09-23-11-18-29', 'ebm-paper-ebm-512-s1-backwards_2022-09-23-11-07-13')],

    ['3jk7cnqa', 'ebm-normal-1-s1', 5, ('e2e-ebm-normal-1-s1-day1-forward_2022-09-20-13-37-56', 'e2e-ebm-normal-1-s1-day1-backward_2022-09-20-13-48-42')],
    # ['3jk7cnqa', 'ebm-normal-1-s1', 20, ('ebm-paper-ebm-normal-1-s1-forward_2022-09-21-11-59-17', 'ebm-paper-ebm-normal-1-s1-backward_2022-09-21-12-10-33')], # RUINED BY RAIN
    ['3jk7cnqa', 'ebm-normal-1-s1', 2, ('ebm-paper-ebm-normal-1-s1-forward_2022-09-22-12-15-22', 'ebm-paper-ebm-normal-1-s1-backward_2022-09-22-12-25-57')],
    ['3jk7cnqa', 'ebm-normal-1-s1', 3, ('ebm-paper-ebm-normal-1-s1-forward_2022-09-23-12-28-45', 'ebm-paper-ebm-normal-1-s1-backwards_2022-09-23-12-17-08')],

    ['2jvl4yhn', 'ebm-spatial-0-s2', 5, ('e2e-ebm-spatial-0-s2-day1-forward_2022-09-20-12-44-32', 'e2e-ebm-spatial-0-s2-day1-backward_2022-09-20-12-55-04')],
    ['2jvl4yhn', 'ebm-spatial-0-s2', 5, ('ebm-paper-ebm-spatial-0-s2-forward_2022-09-21-11-13-54', 'ebm-paper-ebm-spatial-0-s2-backward_2022-09-21-11-24-29')],
    # ['2jvl4yhn', 'ebm-spatial-0-s2', 8, ('ebm-paper-ebm-spatial-0-s2-forward_2022-09-22-11-25-33', 'ebm-paper-ebm-spatial-0-s2-backward_2022-09-22-11-36-02')],
    ['2jvl4yhn', 'ebm-spatial-0-s2', 4, ('ebm-paper-ebm-spatial-0-s2-forward_2022-09-23-11-43-09', 'ebm-paper-ebm-spatial-0-s2-backwards_2022-09-23-11-31-39')],

    ['bxd5wtqk', 'mae-s2', 2, ('e2e-mae-s2-day1-forward_2022-09-20-11-25-14', 'e2e-mae-s2-day1-backward_2022-09-20-11-35-49')],
    ['bxd5wtqk', 'mae-s2', 2, ('ebm-paper-mae-s2-forward_2022-09-21-12-22-57', 'ebm-paper-mae-s2-backward_2022-09-21-12-34-47')],
    # ['bxd5wtqk', 'mae-s2', 5, ('ebm-paper-mae-s2-forward_2022-09-22-10-02-03', 'ebm-paper-mae-s2-backward_2022-09-22-10-26-02')],
    ['bxd5wtqk', 'mae-s2', 1, ('ebm-paper-mae-s2-forward_2022-09-23-10-31-24', 'ebm-paper-mae-s2-backward_2022-09-23-10-19-55')],

    # ['3g3wwx73', 'classifier-512', 7, ('e2e-classifier-512-day1-forward_2022-09-20-12-20-47', 'e2e-classifier-512-day1-backward_2022-09-20-12-31-35')],
    ['3g3wwx73', 'classifier-512', 1, ('ebm-paper-classifier-512-forward_2022-09-21-11-36-50', 'ebm-paper-classifier-512-backward_2022-09-21-11-47-38')],
    ['3g3wwx73', 'classifier-512', 7, ('ebm-paper-classifier-512-forward_2022-09-22-11-47-18', 'ebm-paper-classifier-512-backward_2022-09-22-11-58-03')],
    ['3g3wwx73', 'classifier-512', 1, ('ebm-paper-classifier-512-forward_2022-09-23-12-05-48', 'ebm-paper-classifier-512-backwards_2022-09-23-11-54-32')],

    # ['1hbbr6dm', 'mdn-5-s1', 10, ('e2e-mdn-5-s1-day1-forward_2022-09-20-14-00-44', 'e2e-mdn-5-s1-day1-backward_2022-09-20-14-13-08')], # RUINED BY RAIN
    ['1hbbr6dm', 'mdn-5-s1', 1,  ('ebm-paper-mdn-5-s1-forward_2022-09-21-10-29-09', 'ebm-paper-mdn-5-s1-backward_2022-09-21-10-39-43')],
    ['1hbbr6dm', 'mdn-5-s1', 5, ('ebm-paper-mdn-5-s1-forward_2022-09-22-10-38-10', 'ebm-paper-mdn-5-s1-backward_2022-09-22-10-49-36')],
    ['1hbbr6dm', 'mdn-5-s1', 5, ('ebm-paper-mdn-5-s1-forward_2022-09-23-10-55-27', 'ebm-paper-mdn-5-s1-backwards_2022-09-23-10-43-34')],
]

df = pd.DataFrame(rows, columns=columns)
df

Unnamed: 0,training_run,model_name,real_interventions,drive
0,3ftnqxcb,ebm-512-s1,4,(ebm-paper-ebm-512-s1-forward_2022-09-21-10-51...
1,3ftnqxcb,ebm-512-s1,1,(ebm-paper-ebm-512-s1-forward_2022-09-22-11-02...
2,3ftnqxcb,ebm-512-s1,2,(ebm-paper-ebm-512-s1-forward_2022-09-23-11-18...
3,3jk7cnqa,ebm-normal-1-s1,5,(e2e-ebm-normal-1-s1-day1-forward_2022-09-20-1...
4,3jk7cnqa,ebm-normal-1-s1,2,(ebm-paper-ebm-normal-1-s1-forward_2022-09-22-...
5,3jk7cnqa,ebm-normal-1-s1,3,(ebm-paper-ebm-normal-1-s1-forward_2022-09-23-...
6,2jvl4yhn,ebm-spatial-0-s2,5,(e2e-ebm-spatial-0-s2-day1-forward_2022-09-20-...
7,2jvl4yhn,ebm-spatial-0-s2,5,(ebm-paper-ebm-spatial-0-s2-forward_2022-09-21...
8,2jvl4yhn,ebm-spatial-0-s2,4,(ebm-paper-ebm-spatial-0-s2-forward_2022-09-23...
9,bxd5wtqk,mae-s2,2,"(e2e-mae-s2-day1-forward_2022-09-20-11-25-14, ..."


In [6]:
df.groupby('model_name').mean().sort_values(by='real_interventions')

Unnamed: 0_level_0,real_interventions
model_name,Unnamed: 1_level_1
mae-s2,1.666667
ebm-512-s1,2.333333
classifier-512,3.0
ebm-normal-1-s1,3.333333
mdn-5-s1,3.666667
ebm-spatial-0-s2,4.666667


In [7]:
df.to_csv('ebm-experiments-final.csv', index=False)

In [14]:
df = add_wandb_metrics(df)
df = add_online_metrics(df)
df.to_csv('ebm-experiments-final-results.csv', index=False)
df

0it [00:00, ?it/s]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94ce9b8a00>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-512-s1-forward_2022-09-21-10-51-39: length=17873, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94ce9b8c70>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-512-s1-backward_2022-09-21-11-02-14: length=18309, filtered=0


1it [00:25, 25.02s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0a96c10>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-512-s1-forward_2022-09-22-11-02-58: length=17899, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0a10e80>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-512-s1-backward_2022-09-22-11-13-52: length=18671, filtered=0


2it [00:50, 25.15s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d09b5490>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-512-s1-forward_2022-09-23-11-18-29: length=17934, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0b01be0>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-512-s1-backwards_2022-09-23-11-07-13: length=18310, filtered=0


3it [01:15, 25.26s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f95f4271910>
)
/data/Bolt/end-to-end/drives-ebm-paper/e2e-ebm-normal-1-s1-day1-forward_2022-09-20-13-37-56: length=18183, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f95e57a1c10>
)
/data/Bolt/end-to-end/drives-ebm-paper/e2e-ebm-normal-1-s1-day1-backward_2022-09-20-13-48-42: length=18696, filtered=0


4it [01:40, 25.14s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0b01ca0>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-normal-1-s1-forward_2022-09-22-12-15-22: length=17960, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0c3d220>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-normal-1-s1-backward_2022-09-22-12-25-57: length=19279, filtered=0


5it [02:05, 24.96s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0c3d220>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-normal-1-s1-forward_2022-09-23-12-28-45: length=18014, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0aa6a90>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-normal-1-s1-backwards_2022-09-23-12-17-08: length=18364, filtered=0


6it [02:29, 24.77s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0a96c10>
)
/data/Bolt/end-to-end/drives-ebm-paper/e2e-ebm-spatial-0-s2-day1-forward_2022-09-20-12-44-32: length=17939, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0aa6130>
)
/data/Bolt/end-to-end/drives-ebm-paper/e2e-ebm-spatial-0-s2-day1-backward_2022-09-20-12-55-04: length=18375, filtered=0


7it [02:53, 24.62s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f95f5dc0c70>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-spatial-0-s2-forward_2022-09-21-11-13-54: length=17839, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0a96c10>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-spatial-0-s2-backward_2022-09-21-11-24-29: length=18328, filtered=0


8it [03:18, 24.61s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0b2ac10>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-spatial-0-s2-forward_2022-09-23-11-43-09: length=17909, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0af0640>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-ebm-spatial-0-s2-backwards_2022-09-23-11-31-39: length=18294, filtered=0


9it [03:46, 25.56s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f95e57a1c10>
)
/data/Bolt/end-to-end/drives-ebm-paper/e2e-mae-s2-day1-forward_2022-09-20-11-25-14: length=18017, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94cf25eac0>
)
/data/Bolt/end-to-end/drives-ebm-paper/e2e-mae-s2-day1-backward_2022-09-20-11-35-49: length=18347, filtered=0


10it [04:11, 25.48s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94cf97a130>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mae-s2-forward_2022-09-21-12-22-57: length=18787, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0a58340>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mae-s2-backward_2022-09-21-12-34-47: length=18355, filtered=0


11it [04:37, 25.59s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d09f4a90>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mae-s2-forward_2022-09-23-10-31-24: length=17905, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0a634c0>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mae-s2-backward_2022-09-23-10-19-55: length=18382, filtered=0


12it [05:03, 25.77s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0c3d220>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-classifier-512-forward_2022-09-21-11-36-50: length=18101, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0ad8dc0>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-classifier-512-backward_2022-09-21-11-47-38: length=18289, filtered=0


13it [05:29, 25.77s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0a63220>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-classifier-512-forward_2022-09-22-11-47-18: length=18334, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94cf25e9d0>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-classifier-512-backward_2022-09-22-11-58-03: length=18259, filtered=0


14it [05:54, 25.57s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94de849490>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-classifier-512-forward_2022-09-23-12-05-48: length=17966, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0ad8250>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-classifier-512-backwards_2022-09-23-11-54-32: length=18349, filtered=0


15it [06:19, 25.52s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0a96c10>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mdn-5-s1-forward_2022-09-21-10-29-09: length=17849, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0a63220>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mdn-5-s1-backward_2022-09-21-10-39-43: length=18234, filtered=0


16it [06:46, 25.98s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0aa6a90>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mdn-5-s1-forward_2022-09-22-10-38-10: length=17961, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d09a3970>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mdn-5-s1-backward_2022-09-22-10-49-36: length=18523, filtered=0


17it [07:11, 25.62s/it]

[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0ad8250>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mdn-5-s1-forward_2022-09-23-10-55-27: length=18415, filtered=0
[NvidiaDataset] Using default transform: Compose(
    <dataloading.nvidia.Normalize object at 0x7f94d0aa6a90>
)
/data/Bolt/end-to-end/drives-ebm-paper/ebm-paper-mdn-5-s1-backwards_2022-09-23-10-43-34: length=18635, filtered=0


18it [07:36, 25.34s/it]


Unnamed: 0,training_run,model_name,real_interventions,drive,elva_mae,elva_whiteness,elva_expert_whiteness,val_mae,model_type,traj_mae,traj_rmse,traj_max,traj_failure_rate,distance,distance_per_intervention,whiteness,cmd_whiteness,expert_whiteness
0,3ftnqxcb,ebm-512-s1,4,(ebm-paper-ebm-512-s1-forward_2022-09-21-10-51...,7.487386,191.060638,17.679368,8.607146,pilotnet-ebm,0.502133,0.617828,1.716365,10.592712,8479.229138,2119.807285,35.247937,176.928088,17.655094
1,3ftnqxcb,ebm-512-s1,1,(ebm-paper-ebm-512-s1-forward_2022-09-22-11-02...,7.487386,191.060638,17.679368,8.607146,pilotnet-ebm,0.449918,0.564381,1.713574,8.77301,8535.219556,8535.219556,32.34042,96.937671,17.655094
2,3ftnqxcb,ebm-512-s1,2,(ebm-paper-ebm-512-s1-forward_2022-09-23-11-18...,7.487386,191.060638,17.679368,8.607146,pilotnet-ebm,0.482874,0.592681,1.718401,9.072609,8519.657455,4259.828728,28.570325,223.588494,17.655094
3,3jk7cnqa,ebm-normal-1-s1,5,(e2e-ebm-normal-1-s1-day1-forward_2022-09-20-1...,6.974703,152.191879,17.679368,8.43754,pilotnet-ebm,0.412358,0.524989,1.721364,5.836352,8473.096473,1694.619295,49.922412,119.390706,17.655094
4,3jk7cnqa,ebm-normal-1-s1,2,(ebm-paper-ebm-normal-1-s1-forward_2022-09-22-...,6.974703,152.191879,17.679368,8.43754,pilotnet-ebm,0.407956,0.512668,1.708277,5.597071,8473.696547,4236.848274,38.956632,137.220901,17.655094
5,3jk7cnqa,ebm-normal-1-s1,3,(ebm-paper-ebm-normal-1-s1-forward_2022-09-23-...,6.974703,152.191879,17.679368,8.43754,pilotnet-ebm,0.447816,0.552821,1.596786,7.919993,8520.976775,2840.325592,34.206305,77.275104,17.655094
6,2jvl4yhn,ebm-spatial-0-s2,5,(e2e-ebm-spatial-0-s2-day1-forward_2022-09-20-...,6.909929,125.816818,17.679368,8.738031,pilotnet-ebm,0.436406,0.54832,1.640132,6.90959,8444.219201,1688.84384,27.796907,56.328374,17.655094
7,2jvl4yhn,ebm-spatial-0-s2,5,(ebm-paper-ebm-spatial-0-s2-forward_2022-09-21...,6.909929,125.816818,17.679368,8.738031,pilotnet-ebm,0.442618,0.559457,1.509779,8.546063,8442.245854,1688.449171,46.829478,57.148334,17.655094
8,2jvl4yhn,ebm-spatial-0-s2,4,(ebm-paper-ebm-spatial-0-s2-forward_2022-09-23...,6.909929,125.816818,17.679368,8.738031,pilotnet-ebm,0.464354,0.583141,1.689208,8.90586,8466.304693,2116.576173,33.723343,56.861922,17.655094
9,bxd5wtqk,mae-s2,2,"(e2e-mae-s2-day1-forward_2022-09-20-11-25-14, ...",7.196932,54.846653,17.679368,7.828012,pilotnet,0.400974,0.504417,1.597662,5.848605,8516.51089,4258.255445,26.69053,37.836591,17.655094


In [6]:
def model_name_to_label(name):
    return {
        'ebm-normal-1-s1': 'EBM (temporal regularization)',
        'ebm-spatial-0-s2': 'EBM (spatial targets)',
        'mae-s2': 'MAE',
        'classifier-512': 'Classifation',
        'mdn-5-s1': 'MDN',
        'ebm-512-s1': 'EBM',
    }[name]

In [9]:
{'W_{cmd}'}

{'W_{cmd}'}

In [10]:
df_loaded = pd.read_csv('ebm-experiments-final-results.csv')
# df_loaded = df_loaded.drop(columns=['training_run', 'drive', 'elva_whiteness', 'elva_mae', 
#                                     'elva_expert_whiteness', 'val_mae', 'model_type', 
#                                     'traj_rmse', 'traj_max', 'cmd_whiteness', 'expert_whiteness',
#                                     'traj_mae', 'traj_failure_rate'])

# df_loaded.rename(columns={'model_name': 'Model (session)', 'real_interventions': 'Interventions', 'distance': 'Distance',
#                           'whiteness': 'Whiteness', 'distance_per_intervention': 'DpI'}, inplace=True)
df_loaded = df_loaded.drop(columns=['training_run', 'drive', 'elva_whiteness', 'elva_mae', 
                                    'elva_expert_whiteness', 'val_mae', 'model_type', 
                                    'traj_rmse', 'traj_max', 'expert_whiteness',
                                    'traj_mae', 'traj_failure_rate', 'distance'])

df_loaded.rename(columns={'model_name': 'Model (session)', 'real_interventions': 'Interventions',
                          'whiteness': 'W_{eff}', 'cmd_whiteness': 'W_{cmd}'}, inplace=True)
# reorder columns to Model, Distance, Interventions, DpI, Whiteness
df_loaded = df_loaded[['Model (session)', 'Interventions', 'W_{eff}', 'W_{cmd}']]
display(df_loaded)
formatters = {
    'Model (session)': model_name_to_label,
    'W_{eff}': lambda a: f'{a:.2f}°/s',
    'W_{cmd}': lambda a: f'{a:.2f}°/s',
}
print(df_loaded.to_latex(index=False, float_format='%.2f', formatters=formatters, bold_rows=True))

Unnamed: 0,Model (session),Interventions,W_{eff},W_{cmd}
0,ebm-512-s1,4,35.247937,176.928088
1,ebm-512-s1,1,32.34042,96.937671
2,ebm-512-s1,2,28.570325,223.588494
3,ebm-normal-1-s1,5,49.922412,119.390706
4,ebm-normal-1-s1,2,38.956632,137.220901
5,ebm-normal-1-s1,3,34.206305,77.275104
6,ebm-spatial-0-s2,5,27.796907,56.328374
7,ebm-spatial-0-s2,5,46.829478,57.148334
8,ebm-spatial-0-s2,4,33.723343,56.861922
9,mae-s2,2,26.69053,37.836591


\begin{tabular}{lrrr}
\toprule
              Model (session) &  Interventions &  W\_\{eff\} &   W\_\{cmd\} \\
\midrule
                          EBM &              4 & 35.25°/s & 176.93°/s \\
                          EBM &              1 & 32.34°/s &  96.94°/s \\
                          EBM &              2 & 28.57°/s & 223.59°/s \\
EBM (temporal regularization) &              5 & 49.92°/s & 119.39°/s \\
EBM (temporal regularization) &              2 & 38.96°/s & 137.22°/s \\
EBM (temporal regularization) &              3 & 34.21°/s &  77.28°/s \\
        EBM (spatial targets) &              5 & 27.80°/s &  56.33°/s \\
        EBM (spatial targets) &              5 & 46.83°/s &  57.15°/s \\
        EBM (spatial targets) &              4 & 33.72°/s &  56.86°/s \\
                          MAE &              2 & 26.69°/s &  37.84°/s \\
                          MAE &              2 & 29.65°/s &  75.34°/s \\
                          MAE &              1 & 26.28°/s &  33.10°/s \\
     

  print(df_loaded.to_latex(index=False, float_format='%.2f', formatters=formatters, bold_rows=True))


In [17]:
df = pd.read_csv('ebm-experiments-final-results.csv')
grouped_df = df.groupby(['model_type', 'model_name'])

mean_vals = grouped_df.mean().sort_values(by=['real_interventions'])
median_vals = grouped_df.median().sort_values(by=['real_interventions'])
sorted_df = df.sort_values(by=['real_interventions'])
min_vals = sorted_df.groupby(['model_type', 'model_name']).first().sort_values(by=['real_interventions'])

mean_vals['distance_per_intervention'] = mean_vals['distance'] / mean_vals['real_interventions']
median_vals['distance_per_intervention'] = median_vals['distance'] / median_vals['real_interventions']
min_vals['distance_per_intervention'] = min_vals['distance'] / min_vals['real_interventions']

print('Mean:')
display(mean_vals)
print('Median:')
display(median_vals)
print('Best of each model:')
display(min_vals)

Mean:


Unnamed: 0_level_0,Unnamed: 1_level_0,real_interventions,elva_mae,elva_whiteness,elva_expert_whiteness,val_mae,traj_mae,traj_rmse,traj_max,traj_failure_rate,distance,distance_per_intervention,whiteness,cmd_whiteness,expert_whiteness
model_type,model_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
pilotnet,mae-s2,1.666667,7.196932,54.846653,17.679368,7.828012,0.428689,0.531593,1.619112,5.803563,8547.130941,5128.278565,27.537596,48.761025,17.655094
pilotnet-ebm,ebm-512-s1,2.333333,7.487386,191.060638,17.679368,8.607146,0.478308,0.59163,1.716113,9.479444,8511.368716,3647.72945,32.052894,165.818084,17.655094
pilotnet-classifier,classifier-512,3.0,8.117841,208.457916,17.679368,9.925803,0.430762,0.538518,1.681085,6.740301,8506.755036,2835.585012,45.773931,210.602684,17.655094
pilotnet-ebm,ebm-normal-1-s1,3.333333,6.974703,152.191879,17.679368,8.43754,0.42271,0.530159,1.675475,6.451138,8489.256598,2546.77698,41.02845,111.295571,17.655094
pilotnet-mdn,mdn-5-s1,3.666667,6.73499,74.41684,17.679368,9.0706,0.433356,0.542339,1.693785,6.551073,8456.163521,2306.226415,25.599471,35.490186,17.655094
pilotnet-ebm,ebm-spatial-0-s2,4.666667,6.909929,125.816818,17.679368,8.738031,0.447793,0.563639,1.61304,8.120504,8450.923249,1810.912125,36.116576,56.779543,17.655094


Median:


Unnamed: 0_level_0,Unnamed: 1_level_0,real_interventions,elva_mae,elva_whiteness,elva_expert_whiteness,val_mae,traj_mae,traj_rmse,traj_max,traj_failure_rate,distance,distance_per_intervention,whiteness,cmd_whiteness,expert_whiteness
model_type,model_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
pilotnet-classifier,classifier-512,1.0,8.117841,208.457916,17.679368,9.925803,0.424417,0.527431,1.715849,5.54812,8506.076813,8506.076813,41.047364,182.392543,17.655094
pilotnet,mae-s2,2.0,7.196932,54.846653,17.679368,7.828012,0.440934,0.540331,1.597662,5.848605,8558.516801,4279.2584,26.69053,37.836591,17.655094
pilotnet-ebm,ebm-512-s1,2.0,7.487386,191.060638,17.679368,8.607146,0.482874,0.592681,1.716365,9.072609,8519.657455,4259.828728,32.34042,176.928088,17.655094
pilotnet-ebm,ebm-normal-1-s1,3.0,6.974703,152.191879,17.679368,8.43754,0.412358,0.524989,1.708277,5.836352,8473.696547,2824.565516,38.956632,119.390706,17.655094
pilotnet-ebm,ebm-spatial-0-s2,5.0,6.909929,125.816818,17.679368,8.738031,0.442618,0.559457,1.640132,8.546063,8444.219201,1688.84384,33.723343,56.861922,17.655094
pilotnet-mdn,mdn-5-s1,5.0,6.73499,74.41684,17.679368,9.0706,0.427198,0.5317,1.718781,5.782046,8433.066474,1686.613295,25.315615,35.46087,17.655094


Best of each model:


Unnamed: 0_level_0,Unnamed: 1_level_0,training_run,real_interventions,drive,elva_mae,elva_whiteness,elva_expert_whiteness,val_mae,traj_mae,traj_rmse,traj_max,traj_failure_rate,distance,distance_per_intervention,whiteness,cmd_whiteness,expert_whiteness
model_type,model_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
pilotnet,mae-s2,bxd5wtqk,1,('ebm-paper-mae-s2-forward_2022-09-23-10-31-24...,7.196932,54.846653,17.679368,7.828012,0.440934,0.540331,1.559173,5.361873,8566.365133,8566.365133,26.277066,33.102612,17.655094
pilotnet-classifier,classifier-512,3g3wwx73,1,('ebm-paper-classifier-512-forward_2022-09-23-...,8.117841,208.457916,17.679368,9.925803,0.424417,0.527431,1.715849,5.54812,8560.665268,8560.665268,34.107032,162.274624,17.655094
pilotnet-ebm,ebm-512-s1,3ftnqxcb,1,('ebm-paper-ebm-512-s1-forward_2022-09-22-11-0...,7.487386,191.060638,17.679368,8.607146,0.449918,0.564381,1.713574,8.77301,8535.219556,8535.219556,32.34042,96.937671,17.655094
pilotnet-mdn,mdn-5-s1,1hbbr6dm,1,('ebm-paper-mdn-5-s1-forward_2022-09-21-10-29-...,6.73499,74.41684,17.679368,9.0706,0.427198,0.5317,1.718781,4.938918,8517.875823,8517.875823,25.315615,33.621627,17.655094
pilotnet-ebm,ebm-normal-1-s1,3jk7cnqa,2,('ebm-paper-ebm-normal-1-s1-forward_2022-09-22...,6.974703,152.191879,17.679368,8.43754,0.407956,0.512668,1.708277,5.597071,8473.696547,4236.848274,38.956632,137.220901,17.655094
pilotnet-ebm,ebm-spatial-0-s2,2jvl4yhn,4,('ebm-paper-ebm-spatial-0-s2-forward_2022-09-2...,6.909929,125.816818,17.679368,8.738031,0.464354,0.583141,1.689208,8.90586,8466.304693,2116.576173,33.723343,56.861922,17.655094


In [70]:
df[~df['model_name'].str.contains('ebm')].groupby('model_name').mean().sort_values(by='real_interventions')

Unnamed: 0_level_0,real_interventions,elva_mae,elva_whiteness,elva_expert_whiteness,val_mae,traj_mae,traj_rmse,traj_max,traj_failure_rate,distance,distance_per_intervention,whiteness,cmd_whiteness,expert_whiteness
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
mae-s2,1.6667,7.1969,54.8467,17.6794,7.828,0.4287,0.5316,1.6191,5.8036,8547.1309,5701.293,27.5376,48.761,17.6551
classifier-512,3.0,8.1178,208.4579,17.6794,9.9258,0.4308,0.5385,1.6811,6.7403,8506.755,6091.4627,45.7739,210.6027,17.6551
mdn-5-s1,3.6667,6.735,74.4168,17.6794,9.0706,0.4334,0.5423,1.6938,6.5511,8456.1635,3962.6663,25.5995,35.4902,17.6551


### Correlation study

In [47]:
def calculate_pvalues(df, method='pearson'):
    corr_fn = pearsonr if method == 'pearson' else spearmanr
    df = df.dropna()._get_numeric_data()
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            pvalues[r][c] = round(corr_fn(df[r], df[c])[1], 4)
    return pvalues

def kendall_pval(x,y):
    return kendalltau(x,y)[1]

def pearsonr_pval(x,y):
    return pearsonr(x,y)[1]

def spearmanr_pval(x,y):
    return spearmanr(x,y)[1]

#### On-policy

In [60]:
dff = df[~df['model_name'].str.contains('normal')]
dff = dff[~dff['model_name'].str.contains('spatial')]
dff

Unnamed: 0,training_run,model_name,real_interventions,drive,elva_mae,elva_whiteness,elva_expert_whiteness,val_mae,model_type,traj_mae,traj_rmse,traj_max,traj_failure_rate,distance,distance_per_intervention,whiteness,cmd_whiteness,expert_whiteness
0,3ftnqxcb,ebm-512-s1,4,(ebm-paper-ebm-512-s1-forward_2022-09-21-10-51...,7.4874,191.0606,17.6794,8.6071,pilotnet-ebm,0.5021,0.6178,1.7164,10.5927,8479.2291,2119.8073,35.2479,176.9281,17.6551
1,3ftnqxcb,ebm-512-s1,1,(ebm-paper-classifier-512-forward_2022-09-22-1...,7.4874,191.0606,17.6794,8.6071,pilotnet-ebm,0.4739,0.5898,1.6061,9.6901,8453.523,8453.523,62.1674,287.1409,17.6551
2,3ftnqxcb,ebm-512-s1,2,(ebm-paper-ebm-512-s1-forward_2022-09-23-11-18...,7.4874,191.0606,17.6794,8.6071,pilotnet-ebm,0.4829,0.5927,1.7184,9.0726,8519.6575,4259.8287,28.5703,223.5885,17.6551
9,bxd5wtqk,mae-s2,2,"(e2e-mae-s2-day1-forward_2022-09-20-11-25-14, ...",7.1969,54.8467,17.6794,7.828,pilotnet,0.401,0.5044,1.5977,5.8486,8516.5109,4258.2554,26.6905,37.8366,17.6551
10,bxd5wtqk,mae-s2,2,"(ebm-paper-mae-s2-forward_2022-09-21-12-22-57,...",7.1969,54.8467,17.6794,7.828,pilotnet,0.4442,0.55,1.7005,6.2002,8558.5168,4279.2584,29.6452,75.3439,17.6551
11,bxd5wtqk,mae-s2,1,"(ebm-paper-mae-s2-forward_2022-09-23-10-31-24,...",7.1969,54.8467,17.6794,7.828,pilotnet,0.4409,0.5403,1.5592,5.3619,8566.3651,8566.3651,26.2771,33.1026,17.6551
12,3g3wwx73,classifier-512,1,(ebm-paper-classifier-512-forward_2022-09-21-1...,8.1178,208.4579,17.6794,9.9258,pilotnet-classifier,0.394,0.4983,1.7213,4.9827,8506.0768,8506.0768,41.0474,182.3925,17.6551
13,3g3wwx73,classifier-512,7,(ebm-paper-classifier-512-forward_2022-09-22-1...,8.1178,208.4579,17.6794,9.9258,pilotnet-classifier,0.4739,0.5898,1.6061,9.6901,8453.523,1207.6461,62.1674,287.1409,17.6551
14,3g3wwx73,classifier-512,1,(ebm-paper-classifier-512-forward_2022-09-23-1...,8.1178,208.4579,17.6794,9.9258,pilotnet-classifier,0.4244,0.5274,1.7158,5.5481,8560.6653,8560.6653,34.107,162.2746,17.6551
15,1hbbr6dm,mdn-5-s1,1,(ebm-paper-mdn-5-s1-forward_2022-09-21-10-29-0...,6.735,74.4168,17.6794,9.0706,pilotnet-mdn,0.4272,0.5317,1.7188,4.9389,8517.8758,8517.8758,25.3156,33.6216,17.6551


In [61]:
target_cols = ['real_interventions']
candidate_cols = ['traj_mae', 'whiteness', 'cmd_whiteness']

print('pearson')
display(dff.dropna().groupby('model_name').mean().corr('pearson').loc[candidate_cols, target_cols])
print('spearman')
display(dff.dropna().groupby('model_name').mean().corr('spearman').loc[candidate_cols, target_cols])

pearson


Unnamed: 0,real_interventions
traj_mae,-0.1932
whiteness,-0.0259
cmd_whiteness,-0.0732


spearman


Unnamed: 0,real_interventions
traj_mae,0.4
whiteness,-0.2
cmd_whiteness,-0.4


#### Off-policy

In [56]:
pd.set_option('display.float_format', lambda x: '%.4f' % x)


target_cols = ['real_interventions']
candidate_cols = ['val_mae', 'elva_mae', 'elva_whiteness']
name_grouping = df.dropna().groupby('model_name').mean()

print('pearson')
pearson_corr = name_grouping.corr('pearson').loc[candidate_cols, target_cols]
pearson_pvals = name_grouping.corr(pearsonr_pval).loc[candidate_cols, target_cols]
pearson_corr.loc[:, 'p values'] = pearson_pvals[target_cols]
display(pearson_corr)

print('spearman')
spearman_corr = name_grouping.corr('spearman').loc[candidate_cols, target_cols]
spearman_pvals = name_grouping.corr(spearmanr_pval).loc[candidate_cols, target_cols]
spearman_corr.loc[:, 'p values'] = spearman_pvals[target_cols]
display(spearman_corr)


pearson




Unnamed: 0,real_interventions,p values
val_mae,0.3825,0.4542
elva_mae,-0.4099,0.4196
elva_whiteness,0.0616,0.9077


spearman




Unnamed: 0,real_interventions,p values
val_mae,0.4857,0.3287
elva_mae,-0.7143,0.1108
elva_whiteness,-0.0286,0.9572


### Plots

In [26]:
import matplotlib.pyplot as plt
from metrics.metrics import calculate_lateral_errors

lat_errors = calculate_lateral_errors(model_ds.frames[:-1000], expert_ds.frames, only_autonomous=True)