# M4 Dataset Benchmark Code
> Generic code to experiment and produce the final benchmark.py codes

In [1]:
# python
import numpy as np
import pandas as pd
# ml
from sklearn.preprocessing import MinMaxScaler
# local
from models.benchmark import NaivePredictor
from models.cnn import SimpleCNN
from utils.plot import plot_predictions
from experiment import Experiment
from utils.m4 import smape, mase, M4DatasetGenerator
from utils.ml import print_num_weights
#
from neuralforecast.core import NeuralForecast
from neuralforecast.models import Informer, Autoformer, FEDformer, PatchTST
from neuralforecast.models import NBEATS, NHITS

In [None]:
model_name = 'Informer'
run_sp = 'Weekly'

assert run_sp in ['Hourly','Daily','Weekly','Monthly','Quarterly','Yearly']
prediction_frequency = {'Hourly':'h','Weekly':'W',}[run_sp]

np.random.seed(123)

def get_model(model_name, model_conf):
    if model_name == 'cnn':
        return SimpleCNN(model_conf['input_size'], 64)
    elif model_name == 'naive':
        return NaivePredictor()
    elif model_name == 'NBEATS':
        return NBEATS(input_size=2 * model_conf['input_size'], h=model_conf['forecasting_horizon'], max_steps=50)
    elif model_name == 'NHITS':
        return NHITS(input_size=2 * model_conf['input_size'], h=model_conf['forecasting_horizon'], max_steps=50)
    elif model_name == 'Informer':
        return Informer(
                hidden_size=32, n_head=4, conv_hidden_size=8, encoder_layers= 2, decoder_layers=1,
                input_size=model_conf['input_size'], # Input size
                h=model_conf['forecasting_horizon'], # Forecasting horizon
                max_steps=100, # Number of training iterations
                batch_size=1024, learning_rate=0.01, )


metrics_table = {'serie_id':[],'smape':[],'mase':[],}
smape_list, mase_list = [], []
m4_data = M4DatasetGenerator([run_sp])
num_of_series = m4_data.data_dict[run_sp]['num']
block_size = m4_data.data_dict[run_sp]['fh']
fh = m4_data.data_dict[run_sp]['fh']


#
for train_serie, test_serie, serie_id, fh, freq, serie_sp in m4_data.generate(random=True):
    assert fh == block_size
    # print(f'Serie {serie_id} ({run_sp}) Train serie: {len(train_serie)} steps - Test serie: {len(test_serie)} steps | fh={fh} | freq={freq}')
    # synthetic days
    train_daterange = pd.date_range(start='1980', periods=len(train_serie), freq=prediction_frequency)
    test_daterange = pd.date_range(start=train_daterange[-1], periods=len(test_serie)+1, freq=prediction_frequency)[1:] # len + 1 because the first day is on train dates
    #
    model_conf = {}
    model_conf['input_size'] = min(fh*4, len(train_serie)//10)
    model_conf['forecasting_horizon'] = fh

    model = get_model(model_name, model_conf)
    print_num_weights(model)

    if model_name in ['Informer','autoformer','fedformer','patchtst','NHITS']:
        nf = NeuralForecast(models=[model], freq=prediction_frequency, local_scaler_type='standard')
        train_df = pd.DataFrame({
            'unique_id':serie_id,
            'y':train_serie, 
            'ds':train_daterange
            })
        val_size = 0#int(.2 * len(train_serie)) # 20% for validation
        # model train
        nf.fit(df=train_df, val_size=val_size, verbose=False)
        pred_y = nf.predict()
        #
        assert all(pred_y.ds == test_daterange) # check 
        pred_y = pred_y[model_name].values
    else:
        exp_conf = { 'model': model, 'model_n_parameters': sum(p.numel() for p in model.parameters() if p.requires_grad), 
                    'input_len':block_size, 'forecast_horizon':fh, 'feature_dim':1,
                    'frequency':serie_sp.lower(),
                    'scaler':MinMaxScaler((-1,1)),
                    'decompose': False, #detrend and de-sazonalize
                    'freq':freq, 'device':'cuda', 'verbose':False,}
        train_conf = {
            'epochs':100,
            'lr': 1e-3, 
            'batch_size':1024,
            'validate_freq':10,
            'verbose':False, # stop training if loss dont decrease 0.5% 5 consecutive steps
            # 'early_stop':EarlyStopperPercent(patience=5, min_percent=0.005, verbose=False),
        }
        exp = Experiment(exp_conf)
        exp.set_dataset(linear_serie=train_serie, train=True)
        exp.train(train_conf)
        # test
        last_train_values = train_serie[-block_size:]
        pred_y = exp.predict(last_train_values, fh)
    #
    #
    # check if negative or extreme (M4)
    pred_y[pred_y < 0] = 0
    pred_y[pred_y > (1000 * np.max(train_serie))] = np.max(train_serie)

    # Metrics
    metrics_table['serie_id'].append(serie_id)
    metrics_table['smape'].append(smape(test_serie, pred_y)*100)
    metrics_table['mase'].append(mase(train_serie, test_serie, pred_y, freq))
    print(f'Serie {serie_id}-{serie_sp} Finished')
    print(test_serie[:10])
    print(pred_y[:10])
    plot_predictions(train_serie, test_serie, pred_y)
    
#
metrics_dict = {
    'smape_mean': np.round(np.mean(metrics_table['smape'], dtype=float), 3), 
    'mase_mean':  np.round(np.mean(metrics_table['mase'], dtype=float), 3),
    #
    'smape_std':  np.round(np.std(metrics_table['smape'], dtype=float), 3),
    'mase_std':   np.round(np.std(metrics_table['mase'], dtype=float), 3),
}
print(f'''
    Experiment Finished
''')
for k, v in metrics_dict.items(): print(f'      {k}: {v}')

NameError: name 'np' is not defined

In [7]:
model_name = 'naive'
run_sp = 'Weekly'
m4_data = M4DatasetGenerator([run_sp])

def get_model(model_name, model_conf):
    if model_name == 'naive':
        return NaivePredictor()
    
np.random.seed(123)
#
# Inicializations
#
metrics_table = {'serie_id':[],'smape':[],'mase':[],}
smape_list, mase_list = [], []
num_of_series = m4_data.data_dict[run_sp]['num']
block_size = m4_data.data_dict[run_sp]['fh']
fh = m4_data.data_dict[run_sp]['fh']
#
# Model Hiperparams
#
if model_name == 'naive':
    scaler = None
    decompose = False
    epochs = 1
    batch_size = 0
else:
    scaler = None #MinMaxScaler((-1,1))
    # d_model = 64
    # batch_size = 512 #512
    # epochs = 128

MAX_SERIES = 10**8
for train_serie, test_serie, serie_id, fh, freq, serie_sp in m4_data.generate(n_series=MAX_SERIES, random=False):
    assert fh == block_size
    model_conf = {
        'input_size':fh,
        'h':fh
    }
    
    model = get_model(model_name, model_conf) #
    
    exp_conf = {
            # Model
            'model': model,
            'model_n_parameters': sum(p.numel() for p in model.parameters() if p.requires_grad), 
            'input_len':block_size,
            'forecast_horizon':fh,
            'feature_dim':1,
            # Data
            'frequency':serie_sp.lower(),
            'scaler':scaler,
            'decompose': decompose, #detrend and de-sazonalize
            'freq':freq,
            # Others
            'device':'cuda',
            'verbose':False,
    }
    train_conf = {
        'epochs':epochs,
        'lr':lr, 
        'batch_size':batch_size,
        'validate_freq':10,
        'verbose':False, # stop training if loss dont decrease 0.5% 5 consecutive steps
        'early_stop':EarlyStopperPercent(patience=5, min_percent=0.005, verbose=False),
    }
    if model_name == 'decoder_transformer':
        print(serie_id)
        train_x = torch.tensor(scaler.fit_transform(train_serie.reshape(-1, 1)).reshape(-1), dtype=torch.float32)

        x, y, m = get_x_y(train_x, block_size=512)
        train_conf['train_dataset'] = DecoderDataset(x.unsqueeze(-1), y.unsqueeze(-1), m)
        model.fit(train_conf)

        train_x = train_x.to('cuda').view(1, -1, 1)
        pred_y = model.predict(train_x, len(test_serie)).cpu().numpy()
        pred_y = scaler.inverse_transform(pred_y.reshape(-1,1)).reshape(-1)



    else:
        exp = Experiment(exp_conf)
        exp.set_dataset(linear_serie=train_serie, train=True)
        # exp.set_dataset(linear_serie=test_serie)
        exp.train(train_conf)
        # test
        last_train_values = train_serie[-block_size:]
        pred_y = exp.predict(last_train_values, fh)
    
    # check if negative or extreme (M4)
    pred_y[pred_y < 0] = 0
    pred_y[pred_y > (1000 * np.max(train_serie))] = np.max(train_serie)

    # Metrics
    metrics_table['serie_id'].append(serie_id)
    metrics_table['smape'].append(smape(test_serie, pred_y)*100)
    metrics_table['mase'].append(mase(train_serie, test_serie, pred_y, freq))
    print(f'Serie {serie_id}-{serie_sp} Finished')
    plot_predictions(train_serie, test_serie, pred_y)
    
#
metrics_dict = {
    'smape_mean': np.round(np.mean(metrics_table['smape'], dtype=float), 3), 
    'mase_mean':  np.round(np.mean(metrics_table['mase'], dtype=float), 3),
    #
    'smape_std':  np.round(np.std(metrics_table['smape'], dtype=float), 3),
    'mase_std':   np.round(np.std(metrics_table['mase'], dtype=float), 3),
}
print(f'''
    Experiment Finished
''')
for k, v in metrics_dict.items(): print(f'      {k}: {v}')

SyntaxError: invalid syntax (2146854825.py, line 27)

Seed set to 1


Loading M4 Data...
Loaded:
    => Weekly has 359 series


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 5.80 GiB of which 26.75 MiB is free. Process 16155 has 5.09 GiB memory in use. Including non-PyTorch memory, this process has 82.00 MiB memory in use. Of the allocated memory 1.50 KiB is allocated by PyTorch, and 2.00 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [5]:
model_name = 'vanilla_transformer' # ['vanilla_transformer','cnn','decoder_transformer'][2]
run_sp = 'Weekly'

assert(model_name in ['cnn','naive', 'vanilla_transformer','decoder_transformer'])
assert run_sp in ['Hourly','Daily','Weekly','Monthly','Quarterly','Yearly']
m4_data = M4DatasetGenerator([run_sp])

def get_model(model_name, model_conf):
    if model_name == 'cnn':
        return SimpleCNN(model_conf['block_size'], model_conf['d_model'])
    elif model_name == 'naive':
        return NaivePredictor()
    elif model_name == 'vanilla_transformer':
        return  VanillaTransformer(model_conf)
if TRACK:
    mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
    mlflow.set_experiment(f"M4Benchmark {model_name}")
    mlflow.set_experiment_tag('model', model_name)

np.random.seed(123)
#
# Inicializations
#
metrics_table = {'serie_id':[],'smape':[],'mase':[],}
smape_list, mase_list = [], []
num_of_series = m4_data.data_dict[run_sp]['num']
block_size = m4_data.data_dict[run_sp]['fh']
fh = m4_data.data_dict[run_sp]['fh']
#
# Model Hiperparams
#
d_model = 64
batch_size = 512 #512
epochs = 128
scaler = MinMaxScaler((-1,1))
decompose = False
MAX_SERIES = 10
#
# model_conf = {'block_size':block_size, 'd_model':d_model}
if model_name == 'vanilla_transformer':
    model_conf = {'block_size':block_size, 'd_model': 16, 'num_heads': 2, 'num_layers': 2,'dim_feedforward':128,'device':'cuda'}
    lr = 1e-4
elif model_name == 'decoder_transformer':
    model_conf = {
    'd_model': 32, 
    'num_heads': 4, 
    'num_layers': 4,
    'dim_feedforward':128,
    'block_size':512,
    'device':'cuda',
    'pad_token':-20
}
    lr = 1e-4
else:
    lr = 1e-3

#
if TRACK:
    mlflow.start_run(run_name=f'{run_sp}')
    mlflow.log_param('model_name', model_name)
    mlflow.log_param('d_model', d_model)
    mlflow.log_param('block_size', block_size)
    mlflow.log_param('forecast_horizon', fh)
    mlflow.log_param('decompose', decompose)

    mlflow.log_param('series', num_of_series)
    mlflow.log_param('scaler', scaler)
    mlflow.log_param('batch_size', batch_size)
    mlflow.log_param('epochs', epochs)
    mlflow.log_param('lr', lr)
    
for train_serie, test_serie, serie_id, fh, freq, serie_sp in m4_data.generate(n_series=MAX_SERIES, random=True):
    assert fh == block_size
    model = get_model(model_name, model_conf) #
    
    exp_conf = {
            # Model
            'model': model,
            'model_n_parameters': sum(p.numel() for p in model.parameters() if p.requires_grad), 
            'input_len':block_size,
            'forecast_horizon':fh,
            'feature_dim':1,
            # Data
            'frequency':serie_sp.lower(),
            'scaler':scaler,
            'decompose': decompose, #detrend and de-sazonalize
            'freq':freq,
            # Others
            'device':'cuda',
            'verbose':False,
    }
    train_conf = {
        'epochs':epochs,
        'lr':lr, 
        'batch_size':batch_size,
        'validate_freq':10,
        'verbose':False, # stop training if loss dont decrease 0.5% 5 consecutive steps
        'early_stop':EarlyStopperPercent(patience=5, min_percent=0.005, verbose=False),
    }
    if model_name == 'decoder_transformer':
        print(serie_id)
        train_x = torch.tensor(scaler.fit_transform(train_serie.reshape(-1, 1)).reshape(-1), dtype=torch.float32)

        x, y, m = get_x_y(train_x, block_size=512)
        train_conf['train_dataset'] = DecoderDataset(x.unsqueeze(-1), y.unsqueeze(-1), m)
        model.fit(train_conf)

        train_x = train_x.to('cuda').view(1, -1, 1)
        pred_y = model.predict(train_x, len(test_serie)).cpu().numpy()
        pred_y = scaler.inverse_transform(pred_y.reshape(-1,1)).reshape(-1)



    else:
        exp = Experiment(exp_conf)
        exp.set_dataset(linear_serie=train_serie, train=True)
        # exp.set_dataset(linear_serie=test_serie)
        exp.train(train_conf)
        # test
        last_train_values = train_serie[-block_size:]
        pred_y = exp.predict(last_train_values, fh)
    
    # check if negative or extreme (M4)
    pred_y[pred_y < 0] = 0
    pred_y[pred_y > (1000 * np.max(train_serie))] = np.max(train_serie)

    # Metrics
    metrics_table['serie_id'].append(serie_id)
    metrics_table['smape'].append(smape(test_serie, pred_y)*100)
    metrics_table['mase'].append(mase(train_serie, test_serie, pred_y, freq))
    print(f'Serie {serie_id}-{serie_sp} Finished')
    plot_predictions(train_serie, test_serie, pred_y)
    
#
metrics_dict = {
    'smape_mean': np.round(np.mean(metrics_table['smape'], dtype=float), 3), 
    'mase_mean':  np.round(np.mean(metrics_table['mase'], dtype=float), 3),
    #
    'smape_std':  np.round(np.std(metrics_table['smape'], dtype=float), 3),
    'mase_std':   np.round(np.std(metrics_table['mase'], dtype=float), 3),
}
if TRACK:
    mlflow.log_metrics(metrics_dict)
    mlflow.log_table(metrics_table, artifact_file='metrics_table')

print(f'''
    Experiment Finished
''')
for k, v in metrics_dict.items(): print(f'      {k}: {v}')

Loading M4 Data...
Loaded:
    => Weekly has 359 series


TypeError: VanillaTransformer.__init__() missing 1 required positional argument: 'input_size'

---

In [20]:
import pandas as pd
import numpy as np
from IPython.display import display
import json
metrics_table = json.load(open('results/Weekly_Informer_metrics_table.json'))
display(pd.DataFrame(metrics_table).head(10))

metrics_dict = {
    'smape_mean': np.round(np.mean(metrics_table['smape'], dtype=float), 3), 
    'mase_mean':  np.round(np.mean(metrics_table['mase'], dtype=float), 3),
    #
    'smape_std':  np.round(np.std(metrics_table['smape'], dtype=float), 3),
    'mase_std':   np.round(np.std(metrics_table['mase'], dtype=float), 3),
}
print(f'''
    Experiment Finished
''')
for k, v in metrics_dict.items(): print(f'      {k}: {v}')

Unnamed: 0,serie_id,smape,mase
0,W1,14.393604,74.858755
1,W2,11.724994,32.572269



    Experiment Finished

      smape_mean: 13.059
      mase_mean: 53.716
      smape_std: 1.334
      mase_std: 21.143


In [None]:
print(f'''
    Experiment Finished
''')
for k, v in metrics_dict.items(): print(f'      {k}: {v}')