In [122]:
import pandas as pd
import numpy as np
from functools import reduce
import os

# Evaluation

The main aim of this notebook is to allow users create summary tables from LIBRA benchmarking. An example can be found below.

In [131]:
data_loc = '../results_benchmarking/'

usecases = ['economics', 'nature', 'finance', 'human']
methods = ['multistep', 'onestep']


results = []
for usecase in usecases:
    current_usecase = []
    for method in methods:
        res = pd.read_csv(data_loc + f'results_libra_{method}_{usecase}.csv')
        res.columns = ['method_metrics'] + list(res.columns[1:])
        res['metrics'] = res['method_metrics'].apply(lambda x: x.split('_')[0])
        res['model'] = res['method_metrics'].apply(lambda x: x.split('_')[1])
        res['method'] = res['method_metrics'].apply(lambda x: x.split('_')[2])
        res = res.groupby(['method', 'model', 'metrics']).mean().mean(axis = 1).reset_index()
        res.columns = list(res.columns[:-1]) + [f'average_on_{usecase}']
        parts = []
        for model in res.model.unique():
            part = res[res.model == model]
            part = part[['metrics', f'average_on_{usecase}']]
            part.columns = ['metrics', f'{usecase}_{model}']
            part['method'] = method
            parts.append(part)
        parts = reduce(lambda  left,right: pd.merge(left,
                                                        right, on = ['metrics', 'method']), parts)
        current_usecase.append(parts)
    current_usecase = pd.concat(current_usecase)
    results.append(current_usecase)
    
results = reduce(lambda  left,right: pd.merge(left,
                                                        right, on = ['metrics', 'method']), results)
results = results[['metrics', 'method'] + [col for col in results.columns if '_' in col]]
# results = results

In [132]:
for usecase in usecases:
    print(results[['metrics', 'method']+[col for col in results.columns if usecase in col]].round(2).replace(np.inf, '-').to_latex(
        index = False, bold_rows=True))

\begin{tabular}{llrrrrr}
\toprule
metrics &           method &  economics\_DeepAR &  economics\_LSTM &  economics\_NBeats &  economics\_NP &  economics\_TFT \\
\midrule
   mase & multi\_step\_ahead &              1.95 &           26.89 &              2.73 &         66.78 &          55.42 \\
   moas & multi\_step\_ahead &              0.07 &            0.06 &              0.05 &          0.64 &           0.02 \\
   moes & multi\_step\_ahead &              0.39 &            0.19 &              0.32 &          0.41 &           0.09 \\
   muas & multi\_step\_ahead &              0.05 &            0.30 &              0.06 &          1.29 &           0.71 \\
   mues & multi\_step\_ahead &              0.61 &            0.81 &              0.68 &          0.59 &           0.91 \\
  smape & multi\_step\_ahead &             10.54 &           46.34 &             10.44 &       2281.54 &         131.75 \\
   mase &   one\_step\_ahead &              2.97 &           29.45 &              2.23 &     