In [66]:
%matplotlib inline
import matplotlib.pyplot as plt
import glob
import numpy as np
import seaborn as sns
import pandas as pd

from collections import OrderedDict

In [67]:
def pretty_dataset_name(dataset_name):
    if dataset_name == 'eth':
        return 'ETH - Univ'
    elif dataset_name == 'hotel':
        return 'ETH - Hotel'
    elif dataset_name == 'univ':
        return 'UCY - Univ'
    elif dataset_name == 'zara1':
        return 'UCY - Zara 1'
    elif dataset_name == 'zara2':
        return 'UCY - Zara 2'
    else:
        return dataset_name

# Displacement Error Analyses

In [68]:
errors_df = pd.concat([pd.read_csv(f) for f in glob.glob('plots/data/*_errors.csv')], ignore_index=True)

In [69]:
errors_df.head()

Unnamed: 0,data_precondition,dataset,method,run,node,sample,error_type,error_value
0,prev,zara1,our_most_likely,0,Pedestrian/1,0,mse,0.406025
1,prev,zara1,our_most_likely,0,Pedestrian/1,0,fse,0.800336
2,prev,zara1,our_most_likely,0,Pedestrian/4,0,mse,0.71897
3,prev,zara1,our_most_likely,0,Pedestrian/4,0,fse,1.85283
4,prev,zara1,our_most_likely,0,Pedestrian/7,0,mse,0.853513


In [70]:
dataset_names = ['eth', 'hotel', 'univ', 'zara1', 'zara2']

In [71]:
sgan_err_df = errors_df[(errors_df['data_precondition'] == 'all') & (errors_df['method'] == 'sgan')]
our_ml_err_df = errors_df[(errors_df['data_precondition'] == 'all') & (errors_df['method'] == 'our_most_likely')]
our_full_err_df = errors_df[(errors_df['data_precondition'] == 'all') & (errors_df['method'] == 'our_full')]

In [72]:
sgan_err_df.head()

Unnamed: 0,data_precondition,dataset,method,run,node,sample,error_type,error_value
10902,curr,zara2,sgan,0,Pedestrian/2,0,mse,0.294134
10903,curr,zara2,sgan,0,Pedestrian/2,0,fse,0.598337
10904,curr,zara2,sgan,0,Pedestrian/4,0,mse,0.300409
10905,curr,zara2,sgan,0,Pedestrian/4,0,fse,0.611891
10906,curr,zara2,sgan,0,Pedestrian/5,0,mse,0.491559


In [73]:
sgan_err_df.dtypes

data_precondition     object
dataset               object
method                object
run                    int64
node                  object
sample                 int64
error_type            object
error_value          float64
dtype: object

In [74]:
random_subsamples = np.random.choice(2000, size=100, replace=False).astype(int).tolist()
random_subsamples = [0]

for dataset_name in dataset_names:
    print(dataset_name)
    curr_sgan_df = sgan_err_df[sgan_err_df['dataset'] == dataset_name]
    curr_our_ml_df = our_ml_err_df[our_ml_err_df['dataset'] == dataset_name]
    curr_our_full_df = our_full_err_df[our_full_err_df['dataset'] == dataset_name]

    subsamp_sgan_df = curr_sgan_df[curr_sgan_df['sample'].isin(random_subsamples)]
    subsamp_our_ml_df = curr_our_ml_df[curr_our_ml_df['sample'].isin(random_subsamples)]
    subsamp_our_full_df = curr_our_full_df[curr_our_full_df['sample'].isin(random_subsamples)]        
    
    sgan_sample_errs_df = subsamp_sgan_df.groupby(['run', 'sample', 'error_type'])['error_value'].agg(['sum', 'count']).reset_index()
    sgan_best_sample_errs_df = sgan_sample_errs_df.iloc[sgan_sample_errs_df.groupby(["run", "error_type"])['sum'].idxmin()]
    described_sgan_errs = sgan_best_sample_errs_df.groupby(['error_type']).sum().reset_index()
    described_sgan_errs['best_of_100_mean_error'] = described_sgan_errs['sum'] / described_sgan_errs['count']
    
    our_ml_sample_errs_df = subsamp_our_ml_df.groupby(['run', 'sample', 'error_type'])['error_value'].agg(['sum', 'count']).reset_index()
    our_ml_best_sample_errs_df = our_ml_sample_errs_df.iloc[our_ml_sample_errs_df.groupby(["run", "error_type"])['sum'].idxmin()]
    described_our_ml_errs = our_ml_best_sample_errs_df.groupby(['error_type']).sum().reset_index()
    described_our_ml_errs['best_of_100_mean_error'] = described_our_ml_errs['sum'] / described_our_ml_errs['count']

    our_full_sample_errs_df = subsamp_our_full_df.groupby(['run', 'sample', 'error_type'])['error_value'].agg(['sum', 'count']).reset_index()
    our_full_best_sample_errs_df = our_full_sample_errs_df.iloc[our_full_sample_errs_df.groupby(["run", "error_type"])['sum'].idxmin()]
    described_our_full_errs = our_full_best_sample_errs_df.groupby(['error_type']).sum().reset_index()
    described_our_full_errs['best_of_100_mean_error'] = described_our_full_errs['sum'] / described_our_full_errs['count']
    
    print('-- SGAN --')
    print(described_sgan_errs)
    
    print('-- OUR ML --')
    print(described_our_ml_errs)
    
    print('-- OUR FULL --')
    print(described_our_full_errs)
    
    print()

eth
-- SGAN --
  error_type   run  sample          sum  count  best_of_100_mean_error
0        fse  3486       0  1795.980696    546                3.289342
1        mse  3486       0   986.883064    546                1.807478
-- OUR ML --
  error_type   run  sample         sum  count  best_of_100_mean_error
0        fse  3486       0  895.717482    546                1.640508
1        mse  3486       0  455.663316    546                0.834548
-- OUR FULL --
  error_type   run  sample         sum  count  best_of_100_mean_error
0        fse  3486       0  877.443685    546                1.607040
1        mse  3486       0  439.357486    546                0.804684

hotel
-- SGAN --
  error_type   run  sample          sum  count  best_of_100_mean_error
0        fse  5050       0  1258.237438    568                2.215207
1        mse  5050       0   627.116946    568                1.104079
-- OUR ML --
  error_type   run  sample         sum  count  best_of_100_mean_error
0        f