# M4 Dataset Benchmark Code
> Generic code to experiment and produce the final benchmark.py codes \


The minimum numbers of observations in the training
- 13 for yearly
- 16 for quarterly
- 42 for monthly
- 80 for weekly
- 93 for daily
- 700 for hourly series.



Cada batch é composto de sequencias de diferentes séries, com no máximo block_size steps. \
Caso a serie possuir menos de block_size instantes, realiza-se o padding

In [1]:
# python
import numpy as np
# ml
from sklearn.preprocessing import MinMaxScaler
import mlflow
# local
from models.benchmark import NaivePredictor
from models.cnn import SimpleCNN
from models.transformer import VanillaTransformer, DecoderOnlyTransformer
from utils.plot import plot_predictions
from utils.ml import EarlyStopperPercent
from experiment import Experiment
from utils.m4 import smape, mase
from utils.m4 import load_m4_data
import pandas as pd, numpy as np
import torch
from sklearn.preprocessing import MinMaxScaler

## Experiment Definition

In [2]:
TRACK = False
PAD = -20
model_name = ['cnn','naive', 'vanilla_transformer','decoder_transformer'][3]
run_sp = 'Weekly'

In [3]:
from models.transformer import DecoderOnlyTransformer
from utils.ml import DecoderDataset

In [4]:
#
# Inicializations
#
block_size = 512
n_series = 500
#
# Model Hiperparams
#
model = DecoderOnlyTransformer({
    'd_model': 32, 
    'num_heads': 4, 
    'num_layers': 4,
    'dim_feedforward':128,
    'block_size':block_size,
    'device':'cuda',
    'pad_token':PAD
}).to('cuda')
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('Num of weights:',pytorch_total_params)

Num of weights: 67297


In [6]:
model = torch.load('decoder_only_weekly_38.model').to('cuda')

## Test

In [7]:
from utils.m4 import M4DatasetGenerator
m4_data = M4DatasetGenerator(['Weekly'])
scaler = MinMaxScaler((-1,1))

for train_serie, test_serie, serie_id, fh, eq, serie_sp in m4_data.generate(n_series=40, random=True):
    print(serie_id)
    scaler.fit(train_serie.reshape(-1, 1))    
    x = scaler.transform(train_serie.reshape(-1, 1)).reshape(1, -1, 1)
    x = torch.tensor(x, dtype=torch.float32).to('cuda')
    pred_y = model.predict(x, len(test_serie)).cpu().numpy()
    pred_y = scaler.inverse_transform(pred_y.reshape(-1,1)).reshape(-1)
    plot_predictions(train_serie, test_serie, pred_y)

Loading M4 Data...
Loaded:
    => Weekly has 359 series
W176


W197


W26


W68


W212


W104


W349


W186


W24


W73


W346


W43


W219


W168


W69


W1


W76


W312


W251


W7


W276


W45


W192


W326


W313


W153


W184


W113


W252


W190


W291


W313


W202


W39


W261


W138


W344


W196


W73


W340


In [7]:
from utils.m4 import M4DatasetGenerator
m4_data = M4DatasetGenerator(['Weekly'])
scaler = MinMaxScaler((-1,1))

for train_serie, test_serie, serie_id, fh, eq, serie_sp in m4_data.generate(n_series=40, random=True):
    print(serie_id)
    scaler.fit(train_serie.reshape(-1, 1))    
    x = scaler.transform(train_serie.reshape(-1, 1)).reshape(1, -1, 1)
    x = torch.tensor(x, dtype=torch.float32).to('cuda')
    pred_y = model.predict(x, len(test_serie)).cpu().numpy()
    pred_y = scaler.inverse_transform(pred_y.reshape(-1,1)).reshape(-1)
    plot_predictions(train_serie, test_serie, pred_y)

Loading M4 Data...
Loaded:
    => Weekly has 359 series
W176


W197


W26


W68


W212


W104


W349


W186


W24


W73


W346


W43


W219


W168


W69


W1


W76


W312


W251


W7


W276


W45


W192


W326


W313


W153


W184


W113


W252


W190


W291


W313


W202


W39


W261


W138


W344


W196


W73


W340


In [7]:
block_size = 512
m = MultiSerieGenerator(['Weekly'], device='cpu', verbose=True)
X, M = m.get_batches(block_size=block_size, n_series=2)
print(X.shape)

NameError: name 'MultiSerieGenerator' is not defined

In [None]:
# test
last_train_values = train_serie[-block_size:]
pred_y = exp.predict(last_train_values, fh)

# check if negative or extreme (M4)
pred_y[pred_y < 0] = 0
pred_y[pred_y > (1000 * np.max(train_serie))] = np.max(train_serie)

# Metrics
metrics_table['serie_id'].append(serie_id)
metrics_table['smape'].append(smape(test_serie, pred_y)*100)
metrics_table['mase'].append(mase(train_serie, test_serie, pred_y, freq))
print(f'Serie {serie_id}-{serie_sp} Finished -> smape: {smape(test_serie, pred_y)*100} | mase:{mase(train_serie, test_serie, pred_y, freq)}')
# plot_predictions(train_serie, test_serie, pred_y)

#
metrics_dict = {
'smape_mean': np.round(np.mean(metrics_table['smape'], dtype=float), 3), 
'mase_mean':  np.round(np.mean(metrics_table['mase'], dtype=float), 3),
#
'smape_std':  np.round(np.std(metrics_table['smape'], dtype=float), 3),
'mase_std':   np.round(np.std(metrics_table['mase'], dtype=float), 3),
}
if TRACK:
mlflow.log_metrics(metrics_dict)
mlflow.log_table(metrics_table, artifact_file='metrics_table')

print(f'Full Pass {1+full_pass_i:5}:', end='')
for k, v in metrics_dict.items(): print(f'      {k}: {v}', end='')
print()

IndentationError: expected an indented block after 'if' statement on line 24 (186095145.py, line 25)