In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
from glob import glob

from IPython.display import display
import numpy as np
import pandas as pd
from tqdm import tqdm

from common.utils import median_ensemble

In [22]:
from common.summary_utils import EnsembleStatistics
from common.metrics import smape_1, smape_2, nd, mape

# FRED
from resources.fred.dataset import FredDataset, FredMeta
from resources.fred.evaluator import FredEvaluator
from common.timeseries import TimeseriesBundle

class FredStatistics:
    def __init__(self, **args):
        self.training, self.target = FredDataset(FredMeta.dataset_path).standard_split()
        self.evaluator = FredEvaluator(self.target, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))

# M4
from resources.m4.dataset import M4Dataset, M4Meta
from resources.m4.evaluator import M4Evaluator

class M4Statistics:
    def __init__(self, **args):
        self.training, self.target = M4Dataset(M4Meta.dataset_path).standard_split()
        self.evaluator = M4Evaluator(self.target, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))
    
# M3
from resources.m3.dataset import M3Dataset, M3Meta
from resources.m3.evaluator import M3Evaluator

class M3Statistics:
    def __init__(self, **args):
        self.training, self.target = M3Dataset(M3Meta.dataset_path).standard_split()
        self.evaluator = M3Evaluator(self.target, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))

    
# TOURISM
from resources.tourism.dataset import TourismDataset, TourismMeta
from resources.tourism.evaluator import TourismEvaluator

class TourismStatistics:
    def __init__(self, **args):
        self.training, self.target = TourismDataset(TourismMeta.dataset_path).standard_split()
        self.evaluator = TourismEvaluator(self.target, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))
    
# ELECTRICITY
from resources.electricity.dataset import ElectricityDataset, ElectricityMeta
from resources.electricity.evaluator import ElectricityEvaluator

class ElectricityStatisticsDeepAR:
    def __init__(self, **args):
        self.training, self.target = ElectricityDataset(ElectricityMeta.dataset_path).load_cache().split(
            lambda ts: ts.split_by_time(ElectricityMeta.deepar_split))
        self.target, _ = self.target.split(lambda ts: ts.split(24 * 7))
        self.evaluator = ElectricityEvaluator(self.target, precision=5, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))
    
class ElectricityStatisticsDeepFact:
    def __init__(self, **args):
        self.training, self.target = ElectricityDataset(ElectricityMeta.dataset_path).load_cache().split(
            lambda ts: ts.split_by_time(ElectricityMeta.deepfact_split))
        self.target, _ = self.target.split(lambda ts: ts.split(24 * 7))
        self.evaluator = ElectricityEvaluator(self.target, precision=5, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))
    
class ElectricityStatisticsLastWindow:
    def __init__(self, **args):
        self.training, self.target = ElectricityDataset(ElectricityMeta.dataset_path).standard_split()
        self.evaluator = ElectricityEvaluator(self.target, precision=5, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))
    
# TRAFFIC
from resources.traffic.dataset import TrafficDataset, TrafficMeta
from resources.traffic.evaluator import TrafficEvaluator

class TrafficStatisticsDeepAR:
    def __init__(self, **args):
        self.training, self.target = TrafficDataset(TrafficMeta.dataset_path).load_cache().split(
            lambda ts: ts.split_by_time(TrafficMeta.deepar_split))
        self.target, _ = self.target.split(lambda ts: ts.split(24 * 7))
        self.evaluator = TrafficEvaluator(self.target, precision=5, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))

class TrafficStatisticsDeepFact:
    def __init__(self, **args):
        self.training, self.target = TrafficDataset(TrafficMeta.dataset_path).load_cache().split(
            lambda ts: ts.split_by_time(TrafficMeta.deepfact_split))
        self.target, _ = self.target.split(lambda ts: ts.split(24 * 7))
        self.evaluator = TrafficEvaluator(self.target, precision=5, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))
    
class TrafficStatisticsLastWindow:
    def __init__(self, **args):
        self.training, self.target = TrafficDataset(TrafficMeta.dataset_path).standard_split()
        self.evaluator = TrafficEvaluator(self.target, precision=5, **args)
        
    def evaluate(self, predictions):
        return self.evaluator.evaluate(self.training.future_values([p[~np.isnan(p)] for p in predictions]))
    
def collect_statistics(filter_path, evaluator):
    statistics = EnsembleStatistics(filter_path=filter_path, evaluator=evaluator)

    bootstrap = statistics.bootstrap(ensemble_keys=['repeats', 'lookback_period', 'loss_name'], 
                                     bootstrap_key='repeats', 
                                     bootstrap_size=5, 
                                     number_of_samples=1)

    return bootstrap

#######################################

def assemble_results(experiment_path):
#     fred_bootstrap = collect_statistics(f'{experiment_path}/fred.csv', FredStatistics())
#     m4_bootstrap = collect_statistics(f'{experiment_path}/M4.csv', M4Statistics())
#     m3_bootstrap = collect_statistics(f'{experiment_path}/M3.csv', M3Statistics())
#     tourism_bootstrap = collect_statistics(f'{experiment_path}/tourism.csv', TourismStatistics())
    electricity_deepar_bootstrap = collect_statistics(f'{experiment_path}/electricity_deepar.csv', ElectricityStatisticsDeepAR())
    electricity_deepfact_bootstrap = collect_statistics(f'{experiment_path}/electricity_deepfactors.csv', ElectricityStatisticsDeepFact())
    electricity_lw_bootstrap = collect_statistics(f'{experiment_path}/electricity_last_window.csv', ElectricityStatisticsLastWindow())
    traffic_deepar_bootstrap = collect_statistics(f'{experiment_path}/traffic_deepar.csv', TrafficStatisticsDeepAR())
    traffic_deepfact_bootstrap = collect_statistics(f'{experiment_path}/traffic_deepfactors.csv', TrafficStatisticsDeepFact())
    traffic_lw_bootstrap = collect_statistics(f'{experiment_path}/traffic_last_window.csv', TrafficStatisticsLastWindow())
    
    result = {
#         'fred': fred_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Average']],
#         'm4': m4_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['Yearly', 'Quarterly', 'Monthly', 'Others', 'Average']],
#         'm3': m3_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['M3Year', 'M3Quart', 'M3Month', 'M3Other', 'Average']],
#         'tourism': tourism_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()[['Yearly', 'Quarterly', 'Monthly', 'Average']],
        'electricity_deepar': electricity_deepar_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose(),
        'electricity_deepfact': electricity_deepfact_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose(),
        'electricity_lw': electricity_lw_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose(),
        'traffic_deepar': traffic_deepar_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose(),
        'traffic_deepfact': traffic_deepfact_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose(),
        'traffic_lw': traffic_lw_bootstrap.groupby('evaluation_key').mean()[['metric']].transpose()
    }
    
    for k, v in result.items():
        print(k)
        display(v)

In [3]:
assemble_results('/project/experiments/nbeats_meta/shared/*source_dataset=M4')

100%|██████████| 180/180 [03:01<00:00,  1.01s/it]
100%|██████████| 1/1 [01:36<00:00, 96.10s/it]
100%|██████████| 180/180 [01:23<00:00,  2.15it/s]
100%|██████████| 1/1 [01:02<00:00, 62.93s/it]
100%|██████████| 180/180 [00:02<00:00, 69.04it/s]
100%|██████████| 1/1 [00:00<00:00,  2.08it/s]
100%|██████████| 180/180 [00:01<00:00, 98.22it/s] 
100%|██████████| 1/1 [00:00<00:00,  6.93it/s]
100%|██████████| 180/180 [00:05<00:00, 32.40it/s]
100%|██████████| 1/1 [00:00<00:00,  4.88it/s]
100%|██████████| 180/180 [00:09<00:00, 18.50it/s]
100%|██████████| 1/1 [00:00<00:00,  1.86it/s]

fred





evaluation_key,Yearly,Quarterly,Monthly,Weekly,Daily,Average
metric,15.011,13.362,6.088,5.669,8.646,11.601


m4


evaluation_key,Yearly,Quarterly,Monthly,Others,Average
metric,13.112,9.584,12.535,3.665,11.516


m3


evaluation_key,M3Year,M3Quart,M3Month,Average
metric,15.24,9.09,13.25,12.44


tourism


evaluation_key,Yearly,Quarterly,Monthly,Average
metric,23.57,14.66,19.33,18.82


electricity


evaluation_key,metric
metric,0.09244


traffic


evaluation_key,metric
metric,0.14764


In [4]:
assemble_results('/project/experiments/nbeats_meta/not_shared/*source_dataset=M4')

100%|██████████| 180/180 [03:39<00:00,  1.22s/it]
100%|██████████| 1/1 [01:48<00:00, 108.90s/it]
100%|██████████| 180/180 [01:24<00:00,  2.12it/s]
100%|██████████| 1/1 [00:47<00:00, 47.51s/it]
100%|██████████| 180/180 [00:03<00:00, 46.43it/s]
100%|██████████| 1/1 [00:00<00:00,  2.05it/s]
100%|██████████| 180/180 [00:02<00:00, 72.35it/s]
100%|██████████| 1/1 [00:00<00:00,  7.32it/s]
100%|██████████| 180/180 [00:06<00:00, 27.39it/s]
100%|██████████| 1/1 [00:00<00:00,  5.32it/s]
100%|██████████| 180/180 [00:10<00:00, 16.54it/s]
100%|██████████| 1/1 [00:00<00:00,  1.91it/s]

fred





evaluation_key,Yearly,Quarterly,Monthly,Weekly,Daily,Average
metric,15.061,13.481,6.245,5.709,9.21,11.7


m4


evaluation_key,Yearly,Quarterly,Monthly,Others,Average
metric,13.146,9.55,12.441,3.523,11.464


m3


evaluation_key,M3Year,M3Quart,M3Month,Average
metric,15.09,9.09,13.19,12.39


tourism


evaluation_key,Yearly,Quarterly,Monthly,Average
metric,24.05,14.78,19.32,18.92


electricity


evaluation_key,metric
metric,0.10097


traffic


evaluation_key,metric
metric,0.15158


In [5]:
assemble_results('/project/experiments/nbeats_meta/shared/*source_dataset=FRED')

100%|██████████| 180/180 [03:18<00:00,  1.10s/it]
100%|██████████| 1/1 [02:45<00:00, 165.14s/it]
100%|██████████| 180/180 [01:26<00:00,  2.07it/s]
100%|██████████| 1/1 [01:14<00:00, 74.66s/it]
100%|██████████| 180/180 [00:03<00:00, 51.20it/s]
100%|██████████| 1/1 [00:00<00:00,  2.25it/s]
100%|██████████| 180/180 [00:02<00:00, 74.72it/s]
100%|██████████| 1/1 [00:00<00:00,  7.13it/s]
100%|██████████| 180/180 [00:06<00:00, 28.43it/s]
100%|██████████| 1/1 [00:00<00:00,  5.61it/s]
100%|██████████| 180/180 [00:10<00:00, 16.39it/s]
100%|██████████| 1/1 [00:00<00:00,  2.05it/s]

fred





evaluation_key,Yearly,Quarterly,Monthly,Weekly,Daily,Average
metric,15.344,13.334,6.024,5.187,8.634,11.724


m4


evaluation_key,Yearly,Quarterly,Monthly,Others,Average
metric,13.267,9.634,12.694,4.892,11.701


m3


evaluation_key,M3Year,M3Quart,M3Month,Average
metric,16.44,9.04,13.41,12.68


tourism


evaluation_key,Yearly,Quarterly,Monthly,Average
metric,23.53,14.47,21.23,19.94


electricity


evaluation_key,metric
metric,0.09209


traffic


evaluation_key,metric
metric,0.25842


In [6]:
assemble_results('/project/experiments/nbeats_meta/not_shared/*source_dataset=FRED')

100%|██████████| 180/180 [03:19<00:00,  1.11s/it]
100%|██████████| 1/1 [03:54<00:00, 234.50s/it]
100%|██████████| 180/180 [01:27<00:00,  2.06it/s]
100%|██████████| 1/1 [01:09<00:00, 69.78s/it]
100%|██████████| 180/180 [00:03<00:00, 50.07it/s]
100%|██████████| 1/1 [00:00<00:00,  2.39it/s]
100%|██████████| 180/180 [00:02<00:00, 72.82it/s]
100%|██████████| 1/1 [00:00<00:00,  6.63it/s]
100%|██████████| 180/180 [00:06<00:00, 28.50it/s]
100%|██████████| 1/1 [00:00<00:00,  5.32it/s]
100%|██████████| 180/180 [00:10<00:00, 17.40it/s]
100%|██████████| 1/1 [00:00<00:00,  2.18it/s]

fred





evaluation_key,Yearly,Quarterly,Monthly,Weekly,Daily,Average
metric,15.417,13.341,6.031,5.177,8.603,11.761


m4


evaluation_key,Yearly,Quarterly,Monthly,Others,Average
metric,13.272,9.596,12.676,4.696,11.675


m3


evaluation_key,M3Year,M3Quart,M3Month,Average
metric,16.47,9.08,13.31,12.62


tourism


evaluation_key,Yearly,Quarterly,Monthly,Average
metric,23.43,14.45,20.47,19.46


electricity


evaluation_key,metric
metric,0.08615


traffic


evaluation_key,metric
metric,0.25986


In [23]:
assemble_results('/project/experiments/nbeats_meta/shared/*source_dataset=M4')

100%|██████████| 180/180 [00:05<00:00, 33.03it/s]
100%|██████████| 1/1 [00:00<00:00,  5.13it/s]
100%|██████████| 180/180 [00:05<00:00, 32.75it/s]
100%|██████████| 1/1 [00:00<00:00,  5.85it/s]
100%|██████████| 180/180 [00:05<00:00, 32.54it/s]
100%|██████████| 1/1 [00:00<00:00,  4.79it/s]
100%|██████████| 180/180 [00:09<00:00, 19.11it/s]
100%|██████████| 1/1 [00:00<00:00,  2.11it/s]
100%|██████████| 180/180 [00:09<00:00, 18.83it/s]
100%|██████████| 1/1 [00:00<00:00,  2.16it/s]
100%|██████████| 180/180 [00:09<00:00, 19.00it/s]
100%|██████████| 1/1 [00:00<00:00,  2.12it/s]

electricity_deepar





evaluation_key,metric
metric,0.09408


electricity_deepfact


evaluation_key,metric
metric,0.09221


electricity_lw


evaluation_key,metric
metric,0.17792


traffic_deepar


evaluation_key,metric
metric,0.14721


traffic_deepfact


evaluation_key,metric
metric,0.245


traffic_lw


evaluation_key,metric
metric,0.15569


In [24]:
assemble_results('/project/experiments/nbeats_meta/not_shared/*source_dataset=M4')

100%|██████████| 180/180 [00:06<00:00, 28.06it/s]
100%|██████████| 1/1 [00:00<00:00,  4.93it/s]
100%|██████████| 180/180 [00:06<00:00, 27.57it/s]
100%|██████████| 1/1 [00:00<00:00,  5.61it/s]
100%|██████████| 180/180 [00:07<00:00, 23.09it/s]
100%|██████████| 1/1 [00:00<00:00,  5.22it/s]
100%|██████████| 180/180 [00:11<00:00, 16.12it/s]
100%|██████████| 1/1 [00:00<00:00,  1.92it/s]
100%|██████████| 180/180 [00:11<00:00, 16.03it/s]
100%|██████████| 1/1 [00:00<00:00,  2.28it/s]
100%|██████████| 180/180 [00:11<00:00, 15.93it/s]
100%|██████████| 1/1 [00:00<00:00,  2.13it/s]

electricity_deepar





evaluation_key,metric
metric,0.10206


electricity_deepfact


evaluation_key,metric
metric,0.09535


electricity_lw


evaluation_key,metric
metric,0.18042


traffic_deepar


evaluation_key,metric
metric,0.15175


traffic_deepfact


evaluation_key,metric
metric,0.25021


traffic_lw


evaluation_key,metric
metric,0.15967


In [25]:
assemble_results('/project/experiments/nbeats_meta/shared/*source_dataset=FRED')

100%|██████████| 180/180 [00:06<00:00, 27.69it/s]
100%|██████████| 1/1 [00:00<00:00,  4.87it/s]
100%|██████████| 180/180 [00:06<00:00, 26.18it/s]
100%|██████████| 1/1 [00:00<00:00,  5.48it/s]
100%|██████████| 180/180 [00:06<00:00, 26.15it/s]
100%|██████████| 1/1 [00:00<00:00,  4.70it/s]
100%|██████████| 180/180 [00:10<00:00, 16.47it/s]
100%|██████████| 1/1 [00:00<00:00,  2.12it/s]
100%|██████████| 180/180 [00:11<00:00, 15.90it/s]
100%|██████████| 1/1 [00:00<00:00,  2.27it/s]
100%|██████████| 180/180 [00:11<00:00, 15.79it/s]
100%|██████████| 1/1 [00:00<00:00,  1.92it/s]

electricity_deepar





evaluation_key,metric
metric,0.09136


electricity_deepfact


evaluation_key,metric
metric,0.08379


electricity_lw


evaluation_key,metric
metric,0.20536


traffic_deepar


evaluation_key,metric
metric,0.26001


traffic_deepfact


evaluation_key,metric
metric,0.35461


traffic_lw


evaluation_key,metric
metric,0.2651


In [26]:
assemble_results('/project/experiments/nbeats_meta/not_shared/*source_dataset=FRED')

100%|██████████| 180/180 [00:05<00:00, 31.50it/s]
100%|██████████| 1/1 [00:00<00:00,  4.67it/s]
100%|██████████| 180/180 [00:06<00:00, 27.29it/s]
100%|██████████| 1/1 [00:00<00:00,  5.35it/s]
100%|██████████| 180/180 [00:06<00:00, 26.93it/s]
100%|██████████| 1/1 [00:00<00:00,  4.38it/s]
100%|██████████| 180/180 [00:10<00:00, 17.48it/s]
100%|██████████| 1/1 [00:00<00:00,  2.07it/s]
100%|██████████| 180/180 [00:11<00:00, 16.13it/s]
100%|██████████| 1/1 [00:00<00:00,  2.10it/s]
100%|██████████| 180/180 [00:11<00:00, 15.15it/s]
100%|██████████| 1/1 [00:00<00:00,  2.15it/s]

electricity_deepar





evaluation_key,metric
metric,0.08583


electricity_deepfact


evaluation_key,metric
metric,0.08003


electricity_lw


evaluation_key,metric
metric,0.20676


traffic_deepar


evaluation_key,metric
metric,0.25867


traffic_deepfact


evaluation_key,metric
metric,0.34753


traffic_lw


evaluation_key,metric
metric,0.2652
