# M4 Dataset Transfer Learning Training
> Code that train the Vanilla Transformer on all train series from M4 Dataset

In [2]:
from utils.m4 import MultiSerieGenerator
import numpy as np
import torch

np.random.seed(777)
generator =  MultiSerieGenerator(['Weekly'], input_len=1000, forecast_horizon=10,device='cuda')
dataset = generator.get_batches(n_series=2, random=True)
dataset = generator.get_batches(n_series=2, random=True)
dataset = generator.get_batches(n_series=2, random=True)

    => Weekly has 359 series


RuntimeError: Tensors must have same number of dimensions: got 2 and 3

In [1]:
from utils.m4 import load_m4_data
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from utils.ml import make_batches, TransformerDataset
import torch

class MultiSerieGenerator():
    def __init__(self, freq, input_len, forecast_horizon):
        self.input_len = input_len
        self.forecast_horizon = forecast_horizon
        self.device = 'cuda'
        print('Loading M4 Data...')
        self.data_dict, self.df_info = load_m4_data(freq)
        print('Loaded:')
        for SP in freq:print(f"    => {SP} has {self.data_dict[SP]['num']} series")
    
    def get_batches(self, n_series=None, random=False, seed=None, verbose=False):
        if seed is not None:
            np.random.seed(seed)
        df_info, data_dict = self.df_info, self.data_dict
        if n_series is None:
            n_series = len(df_info)
        if random:
            idx = np.random.randint(low=0, high=len(df_info), size=n_series)
        else:
            idx = range(n_series)
        if verbose: print(f'Generating {len(idx)} series..')
        # faz o scalind individual das series completas
        scaler = MinMaxScaler((-1, 1))
        all_enc_x, all_dec_x, all_tgt_y  = [], [], []
        for serie_index in idx:
            serie_info = df_info.iloc[serie_index]
            serie_id = serie_info.M4id
            print(serie_id, end=', ')
            serie_sp = serie_info.SP
            fh = data_dict[serie_sp]['fh']
            freq = data_dict[serie_sp]['freq']
            train_df = data_dict[serie_sp]['train']
            test_df = data_dict[serie_sp]['test']
            # the V1 column is the name of the serie
            train_serie = train_df[train_df.V1 == serie_id].dropna(axis=1).values.reshape(-1)[1:]
            test_serie = test_df[test_df.V1 == serie_id].dropna(axis=1).values.reshape(-1)[1:]
            test_serie = test_serie[:fh] # forecast only fh steps
            train_serie = np.asarray(train_serie, dtype=np.float32)
            test_serie = np.asarray(test_serie, dtype=np.float32)
            #
            train_serie = scaler.fit_transform(train_serie.reshape(-1, 1)).reshape(-1)
            test_serie = scaler.transform(test_serie.reshape(-1, 1)).reshape(-1)
            #
            enc_x, dec_x, tgt_y = make_batches(train_serie, self.input_len, self.forecast_horizon)
            # enc_x, dec_x, tgt_y = enc_x.to(self.device), dec_x.to(self.device), tgt_y.to(self.device)
            all_enc_x.append(enc_x)
            all_dec_x.append(dec_x)
            all_tgt_y.append(tgt_y)
        # stack
        all_enc_x = torch.vstack(all_enc_x)
        all_dec_x = torch.vstack(all_dec_x)
        all_tgt_y = torch.vstack(all_tgt_y)
        # shuffle
        all_enc_x = all_enc_x.to(self.device)
        all_dec_x = all_dec_x.to(self.device)
        all_tgt_y = all_tgt_y.to(self.device)
        print(f'Generated {len(all_enc_x)} batches from {len(idx)} series-> shape {all_enc_x.shape}')

        return  TransformerDataset(all_enc_x, all_dec_x, all_tgt_y)

np.random.seed(777)
generator =  MultiSerieGenerator(['Weekly'], input_len=10, forecast_horizon=10)
dataset = generator.get_batches(n_series=5, random=True, verbose=False)
dataset = generator.get_batches(n_series=5, random=True, verbose=False)
dataset = generator.get_batches(n_series=5, random=True, verbose=False)


Loading M4 Data...
Loaded:
    => Weekly has 359 series
W104, W304, W60, W88, W72, Generated 5418 batches from 5 series-> shape torch.Size([5418, 10, 1])
W158, W128, W117, W303, W281, Generated 5585 batches from 5 series-> shape torch.Size([5585, 10, 1])
W296, W143, W33, W348, W322, Generated 2792 batches from 5 series-> shape torch.Size([2792, 10, 1])


In [3]:
# %%writefile experiment.py
import numpy as np
import time
import torch
from torch import nn
from utils.timeserie import split_sequence
from torch.utils.data import DataLoader
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error
import mlflow
#
from utils.ml import SimpleDataset, TransformerDataset
from utils.plot import plot_train_history, plot_predictions
from utils.m4 import deseasonalize, detrend, smape
# Transformers
from utils.ml import make_batches
from utils.plot import generate_square_subsequent_mask
#
class Experiment():

    def __init__(self, config: dict):
        # Set experiment config
        expected_vars = ['model','input_len','feature_dim','frequency',
                         'device','scaler','verbose','freq','decompose',
                         'forecast_horizon',]
        for v in expected_vars:
            assert v in config.keys(), f'Key "{v}" is missing on params dict'
        for k, v in config.items():
            vars(self)[k] = v
        self.config = config
        #
        # Pre-configuration (to produce same result in inference/predict)
        #
        np.random.seed(7); torch.manual_seed(7)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(7)
        #
        #
        #
        self.model = self.model.to(self.device)
        self.validation_dataset = None
        self.train_dataset = None

    def split_chunks(self, linear_serie, expand_dim=True):
        x, y = split_sequence(linear_serie, self.input_len)
        x, y = torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)
        if expand_dim:
            x, y = x.unsqueeze(-1), y.unsqueeze(-1)
        return x, y
    
    def ts_transform(self, ts, train=False):
        # de-sazonality
        if train: # calculate
            if self.verbose: print('Decomposition FIT')
            self.seasonality_in = deseasonalize(ts, self.freq)
        for i in range(0, len(ts)):
            ts[i] = ts[i] * 100 / self.seasonality_in[i % self.freq]

        # de-trending
        if train: # calculate
            self.a, self.b = detrend(ts)   
        for i in range(0, len(ts)):
            ts[i] = ts[i] - ((self.a * i) + self.b)
        
        return ts

    def ts_inverse_transform(self, ts):
        # add trend
        for i in range(0, len(ts)):
            ts[i] = ts[i] + ((self.a * i) + self.b)
        # add seasonality
        for i in range(0, len(ts)):
            ts[i] = ts[i] * self.seasonality_in[i % self.freq] / 100
        
        return ts
    
    def set_dataset(self, linear_serie, train=False, validation=False):
        linear_serie = linear_serie.copy()
        if self.decompose:
            linear_serie = self.ts_transform(linear_serie, train)

        if self.scaler is not None:
            if train: # FIT Scaler
                linear_serie = self.scaler.fit_transform(linear_serie.reshape(-1,1)).reshape(-1)
                if self.verbose: print('Scaler FIT')
                
            if validation:
                linear_serie = self.scaler.transform(linear_serie.reshape(-1,1)).reshape(-1)
        
        # transformer specific input shape
        if self.model.is_transformer():
            enc_x, dec_x, tgt_y = make_batches(linear_serie, 
                                               self.input_len, self.forecast_horizon)
            enc_x, dec_x, tgt_y = enc_x.to(self.device), dec_x.to(self.device), tgt_y.to(self.device)
            data = TransformerDataset(enc_x, dec_x, tgt_y)
        else: # naive, cnn, mlp, lstm, 
            x, y = self.split_chunks(linear_serie)
            x, y = x.to(self.device), y.to(self.device)
            data = SimpleDataset(x, y)
        # Save
        if train:
            self.train_dataset = data
        if validation:
            self.validation_dataset = data
        
        return data

    def train(self, train_conf):
        train_conf['train_dataset'] = self.train_dataset
        train_conf['validation_dataset'] = self.validation_dataset
        self.model.fit(train_conf)
    
    def train_history(self, offset=0):
        plot_train_history(self.model.train_loss_history, self.model.validation_loss_history, offset)
    
    def preprocess(self, ts): # from numpy to torch
        ts = np.asarray(ts, dtype=np.float32).copy()
        if self.decompose:
            ts = self.ts_transform(ts, train=False)
        if self.scaler is not None:
            ts = self.scaler.transform(ts.reshape(-1,1)).reshape(-1)
        ts = torch.tensor(ts, dtype=torch.float32)\
                                .view(1,-1, 1).to(self.device)
        return ts

    def posprocess(self, ts): # from torch to numpy
        ts = ts.to('cpu').detach().numpy()
        if self.scaler is not None:
            ts = self.scaler.inverse_transform(ts.reshape(-1,1)).reshape(-1)
        if self.decompose:
            ts = self.ts_inverse_transform(ts)
        # ts = ts.to(self.device)
        return ts.flatten()

    def predict(self, ts, forecast_horizon):
        # assert(len(ts) == self.input_len) 
        ts = self.preprocess(ts) # TODO add ts copy 
        output = self.model.predict(ts, forecast_horizon)
        # rescale and add trend, etc..
        output = self.posprocess(output)
        return output
    
    
    def print_metrics(self, real_y, pred_y):
        mape = mean_absolute_percentage_error(real_y, pred_y)
        err_smape = smape(real_y, pred_y)
        mse = mean_squared_error(real_y, pred_y)
        print(f'MSE: {mse:.4f} | MAPE: {mape:.4f} | sMAPE: {err_smape:.4f}')


In [4]:
# python
import argparse
import numpy as np
# ml
from sklearn.preprocessing import MinMaxScaler
import mlflow
# local
from models.benchmark import NaivePredictor
from models.cnn import SimpleCNN
from models.transformer import VanillaTransformer
from utils.plot import plot_predictions
from utils.ml import EarlyStopperPercent
from experiment import Experiment
from utils.m4 import smape, mase, M4DatasetGenerator

## Experiment Definition

In [5]:
TRACK = False

In [6]:
model_name = ['vanilla_transformer','cnn'][0]
run_sp = 'Weekly'

assert(model_name in ['cnn','naive', 'vanilla_transformer'])
assert run_sp in ['Hourly','Daily','Weekly','Monthly','Quarterly','Yearly']
m4_data = M4DatasetGenerator([run_sp])

def get_model(model_name, model_conf):
    if model_name == 'cnn':
        return SimpleCNN(model_conf['block_size'], model_conf['d_model'])
    elif model_name == 'naive':
        return NaivePredictor()
    elif model_name == 'vanilla_transformer':
        return  VanillaTransformer(model_conf)
    
if TRACK:
    mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
    mlflow.set_experiment(f"M4Benchmark {model_name}")
    mlflow.set_experiment_tag('model', model_name)

np.random.seed(123)

Loading M4 Data...
Loaded:
    => Weekly has 359 series


In [7]:
#
# Inicializations
#
num_of_series = m4_data.data_dict[run_sp]['num']
block_size = m4_data.data_dict[run_sp]['fh']
fh = m4_data.data_dict[run_sp]['fh']
#
# Model Hiperparams
#
d_model = 64
batch_size = 512 #512
epochs = 512
scaler = MinMaxScaler((-1,1))
decompose = False
MAX_SERIES = 2
FULL_PASSES = 50
#
# model_conf = {'block_size':block_size, 'd_model':d_model}
if model_name == 'vanilla_transformer':
    model_conf = {'block_size':block_size, 'd_model': d_model, 'num_heads': 2, 'num_layers': 2,'dim_feedforward':128,'device':'cuda'}
    lr = 1e-4
else:
    lr = 1e-3
model = get_model(model_name, model_conf)
model_n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Params: {model_n_parameters}')
# model

for full_pass_i in range(FULL_PASSES):
    metrics_table = {'serie_id':[],'smape':[],'mase':[],}
    smape_list, mase_list = [], []
    for train_serie, test_serie, serie_id, fh, freq, serie_sp in m4_data.generate(n_series=MAX_SERIES, random=True):
        assert fh == block_size
        exp_conf = {
                # Model
                'model': model,
                'model_n_parameters': model_n_parameters, 
                'input_len':block_size,
                'forecast_horizon':fh,
                'feature_dim':1,
                # Data
                'frequency':serie_sp.lower(),
                'scaler':scaler,
                'decompose': decompose, #detrend and de-sazonalize
                'freq':freq,
                # Others
                'device':'cuda',
                'verbose':False,
        }
        train_conf = {
            'epochs':epochs,
            'lr':lr, 
            'batch_size':batch_size,
            'verbose':False, # stop training if loss dont decrease 0.5% 5 consecutive steps
            'early_stop':EarlyStopperPercent(patience=5, min_percent=0.005, verbose=False),
        }
        exp = Experiment(exp_conf)
        exp.set_dataset(linear_serie=train_serie, train=True)
        # exp.set_dataset(linear_serie=test_serie)
        exp.train(train_conf)
        # test
        last_train_values = train_serie[-block_size:]
        pred_y = exp.predict(last_train_values, fh)
        
        # check if negative or extreme (M4)
        pred_y[pred_y < 0] = 0
        pred_y[pred_y > (1000 * np.max(train_serie))] = np.max(train_serie)

        # Metrics
        metrics_table['serie_id'].append(serie_id)
        metrics_table['smape'].append(smape(test_serie, pred_y)*100)
        metrics_table['mase'].append(mase(train_serie, test_serie, pred_y, freq))
        # print(f'Serie {serie_id}-{serie_sp} Finished -> smape: {smape(test_serie, pred_y)*100} | mase:{mase(train_serie, test_serie, pred_y, freq)}')
        # plot_predictions(train_serie, test_serie, pred_y)
        
    #
    metrics_dict = {
        'smape_mean': np.round(np.mean(metrics_table['smape'], dtype=float), 3), 
        'mase_mean':  np.round(np.mean(metrics_table['mase'], dtype=float), 3),
        #
        'smape_std':  np.round(np.std(metrics_table['smape'], dtype=float), 3),
        'mase_std':   np.round(np.std(metrics_table['mase'], dtype=float), 3),
    }
    if TRACK:
        mlflow.log_metrics(metrics_dict)
        mlflow.log_table(metrics_table, artifact_file='metrics_table')

    print(f'Full Pass {1+full_pass_i:5}:', end='')
    for k, v in metrics_dict.items(): print(f'      {k}: {v}', end='')
    print()

Params: 168833
Full Pass     1:      smape_mean: 2.193      mase_mean: 2.033      smape_std: 0.388      mase_std: 1.214
Full Pass     2:      smape_mean: 3.131      mase_mean: 2.657      smape_std: 0.044      mase_std: 1.238
