In [1]:
import os
import io
import numpy as np
import pandas as pd
import torch
import sys
from dataclasses import dataclass
import argparse
current_dir = os.getcwd()
parallel_folder = os.path.abspath(os.path.join(current_dir, '../Fit_and_Predict'))

models_path = "../Fit_and_Predict/models"
results_stat = [r for r in os.listdir(models_path) if r.endswith('.csv') and 'HNAM' not in r and 'TFT' not in r]

df_stat = pd.DataFrame()
for result in results_stat:
    df_stat = pd.concat([df_stat, pd.read_csv(os.path.join(models_path, result))], axis=0)


df_stat_pivot = df_stat.pivot_table(index='model', columns='dataset', values='training_time')




results_nn = [r for r in os.listdir(models_path) if r.endswith('.csv') and ('HNAM' in r or 'TFT' in r)]
df_nn = pd.DataFrame()
for result in results_nn:
    df_nn = pd.concat([df_nn, pd.read_csv(os.path.join(models_path, result))], axis=0)
# only those rows where path contains version 0 to 4 via regex
df_nn = df_nn[df_nn['path'].str.contains(r'version_[0-4]')]
df_nn['model'] = df_nn['path'].apply(lambda x: x.split('/')[2])
df_nn['dataset'] = df_nn['path'].apply(lambda x: x.split('/')[1])
df_nn = df_nn.reset_index(drop=True)
for i,row in df_nn.iterrows():
    path = row['path']
    path = path.split('checkpoint')[0]
    path = '../Fit_and_Predict/' + path + 'metrics.csv'
    df_metrics = pd.read_csv(path)
    epochs = df_metrics['epoch'].max() + 1
    df_nn.loc[i, 'epochs'] = epochs
df_nn['time_per_epoch'] = df_nn['seconds_training'] / df_nn['epochs']


df_nn_pivot = df_nn.pivot_table(index='model', columns='dataset', values='seconds_training', aggfunc='sum')
time_per_epoch_pivot = df_nn.pivot_table(index='model', columns='dataset', values='time_per_epoch', aggfunc='mean')


times = pd.concat([df_stat_pivot, df_nn_pivot],  axis=0)
times = pd.concat([times, time_per_epoch_pivot],keys=['Training Time', 'per Epoch'], axis=0).round(0).astype(int)

# in the index rename ARIMA to SARIMAX
times.rename(index={'ARIMA': 'SARIMAX'}, inplace=True)
times.rename(index={'ETS': 'ETSX'}, inplace=True)

# no index and dataset names
times.columns.name = None
times.index.names = [None,None]

times = times[['Favorita','Walmart']]


display(times)
def custom_format(x):
    if isinstance(x, int):
        # mark the thousands with a ,
        return f'{x:,}'


latex_string = times.to_latex(
    caption=f'Training Times in Seconds',
    label=f'tab:training_times',
    position='ht',
    column_format='rrccc',
    multicolumn_format = 'c',
    formatters = [custom_format]*times.shape[1]
)

print(latex_string)

Unnamed: 0,Unnamed: 1,Favorita,Walmart
Training Time,SARIMAX,8660,6559
Training Time,ETSX,491,241
Training Time,Lasso,234,140
Training Time,Prophet,3029,1474
Training Time,HNAM,17913,15751
Training Time,TFT,108828,55964
per Epoch,HNAM,68,78
per Epoch,TFT,470,328


\begin{table}[ht]
\caption{Training Times in Seconds}
\label{tab:training_times}
\begin{tabular}{rrccc}
\toprule
 &  & Favorita & Walmart \\
\midrule
\multirow[t]{6}{*}{Training Time} & SARIMAX & 8,660 & 6,559 \\
 & ETSX & 491 & 241 \\
 & Lasso & 234 & 140 \\
 & Prophet & 3,029 & 1,474 \\
 & HNAM & 17,913 & 15,751 \\
 & TFT & 108,828 & 55,964 \\
\cline{1-4}
\multirow[t]{2}{*}{per Epoch} & HNAM & 68 & 78 \\
 & TFT & 470 & 328 \\
\cline{1-4}
\bottomrule
\end{tabular}
\end{table}

