In [260]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

In [261]:
drop_columns = ['Unnamed: 0',
'unit.IRI_KEY',
'unit.SY',
'unit.GE',
'unit.VEND',
'unit.ITEM',
'price.IRI_KEY',
'price.SY',
'price.GE',
'price.VEND',
'price.ITEM',
'price.cate',
'F.IRI_KEY',
'F.SY',
'F.GE',
'F.VEND',
'F.ITEM',
'F.cate',
'D.IRI_KEY',
'D.SY',
'D.GE',
'D.VEND',
'D.ITEM',
'D.cate',
'holiday.IRI_KEY',
'holiday.SY',
'holiday.GE',
'holiday.VEND',
'holiday.ITEM',
'holiday.cate',]

def read_csv(i):
    # read each slots csv, create tensor of dimension:
    # (num_timeseries, lenght, features)
    dataset = []
    data = pd.read_csv(f"dataset/iri{i}.csv", usecols=lambda x: x not in drop_columns)

    cl = data.columns.to_list()
    clcat = ['unit.cate'] 
    clu = [c for c in cl if 'unit.1' in c]
    clp = [c for c in cl if 'price.1' in c]
    clh = [c for c in cl if 'holiday.1' in c]
    clf = [c for c in cl if 'F.' in c]
    cld = [c for c in cl if 'D.' in c]

    
    dataset.append(data[clu].values)
    #replace some inf values in price
    data_clp = data[clp].replace(np.inf, np.nan).interpolate()
    dataset.append(data_clp.values)
    dataset.append(data[clh].values)
    dataset.append(data[clf].values)
    dataset.append(data[cld].values)
    dataset = np.array(dataset)
    dataset = np.transpose(dataset,(1,2,0))
    return dataset , data[clcat].values

def normalize(d):
    # normalize unit and price for NNs
    norm = torch.nn.InstanceNorm1d(2)
    dd_norm = norm(d[:,:,:2])
    d[:,:,:2] = dd_norm
    return d

def concat_slots(fist_slot, last_slot):
    # concat slots to create whole train/valid/test dataset 
    dataset = []
    catg = []
    for i in range(fist_slot, last_slot):
        tens , cat = read_csv(i)
        dataset.append(tens)
        catg.append(cat)

    catg = np.concatenate(catg,axis=0)
    dataset = np.concatenate(dataset,axis=0)
    d_troch = torch.Tensor(dataset)
    return dataset , catg #np.array , torch.array, np.array

def mode_indx(mode):
    if mode == "train":
        (start,end) = (1,8) #(60993, 55, 5)
    if mode == "valid":
        (start,end) = (8,11)#(22951, 55, 5)
    if mode == "test":     
        (start,end) = (11,16)#(36194, 55, 5)
    return (start,end)
   

In [262]:
from torch.utils.data import Dataset

class IRIDataset(Dataset):
    def __init__(self, mode, horizon):

        self.mode = mode
        self.horizon = horizon
        (star,end) = mode_indx(mode)
        self.data, _ = concat_slots(star,end)
        self.base = self.get_base_forecasts()
        
    
    def get_base_forecasts(self):
        base_forecasters = np.load(f'base_forecasters/{self.horizon}/_all_npy/base_{self.mode}.npy')
        return base_forecasters

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        data_tensor = torch.from_numpy(self.data[idx]).permute(1, 0).float()
        base_tensor = torch.from_numpy(self.base[idx]).float()
        return {'series' : data_tensor, 'base': base_tensor}
        

In [263]:
horizon = 7
batch_size = 31

In [264]:
from lightning.pytorch import seed_everything

torch.manual_seed(0)
np.random.seed(0)
random.seed(0)
seed_everything(0, workers=True)

Global seed set to 0


0

In [265]:
from torch.utils.data import DataLoader

train_dataset = IRIDataset("train", horizon)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=8)

valid_dataset = IRIDataset("valid", horizon)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=8)


test_dataset = IRIDataset("test", horizon)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8)

## Implementation of M0 Model in PyTorch 

In [266]:
from torchvision.ops import SqueezeExcitation


class SEBlock(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super().__init__()

        self.squeeze = nn.AdaptiveAvgPool1d(1)
        self.excitation = nn.Sequential(
            nn.Linear(in_channels, in_channels // reduction),
            nn.ReLU(),
            nn.Linear(in_channels // reduction, in_channels),
            nn.Sigmoid()
        )

    def forward(self, x):
        h = self.squeeze(x)
        h = self.excitation(h.squeeze(dim=(1,2))).unsqueeze(2)
        return h * x
        


class MetaLearnerConvBlock(nn.Module):
    def __init__(self, num_channels=1):
        super().__init__()

        self.gpool = nn.AdaptiveAvgPool1d(1)
        
        self.b1 = nn.Sequential(
            nn.Conv1d(num_channels, 64, kernel_size=2, padding='same'),
            nn.ReLU(),
            SEBlock(64)   
        )
        
        self.b2 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=4, padding='same'),
            nn.ReLU(),
            SEBlock(128)   
        )

        self.b3 = nn.Sequential(
            nn.Conv1d(128, 64, kernel_size=8, padding='same'),
            nn.ReLU(),
        )
    
    def forward(self, x):
        h = self.b1(x)
        h = self.b2(h)
        h = self.b3(h)
        h = self.gpool(h).squeeze()
        return h

In [267]:
class MetaLearnerM0(nn.Module):
    def __init__(self, num_forecasters):
        super().__init__()

        self.normalizer = nn.InstanceNorm1d(5)
        self.sales_block = MetaLearnerConvBlock(num_channels=1)
        self.inf_block = MetaLearnerConvBlock(num_channels=4)
        
        self.weights = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(128, num_forecasters),
            nn.Softmax(dim=1)
        )
    
    def forward(self, x):
        z = self.normalizer(x)
        h_sales = self.sales_block(z[:, :1, :55-horizon])
        h_inf = self.inf_block(z[:, 1:, :])

        h = torch.cat((h_sales, h_inf), dim=1)

        weights = self.weights(h)
        
        return weights

M0 = MetaLearnerM0(8)
# M0(sample).shape
print('Total M0 parameters : ', sum([ torch.numel(p) for p in M0.parameters() ]))

Total M0 parameters :  204320


In [268]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks.early_stopping import EarlyStopping

class LiteM0(pl.LightningModule):
    def __init__(self, m0):
        super().__init__()
        self.m0 = m0

    def training_step(self, batch, batch_idx):
        weights = self.m0(batch['series'][:, :, :])
        predictions = torch.bmm(weights.unsqueeze(1), batch['base']).squeeze()
        true_values = batch['series'][:, 0, -horizon:]
        # print(predictions, true_values)
        # ent = torch.distributions.Categorical(weights).entropy()
        pred_loss = torch.sqrt(nn.functional.mse_loss(true_values.squeeze(), predictions.squeeze()))
        loss = pred_loss

        self.log('train_loss_pred', pred_loss, on_epoch=True, on_step=False)
        self.log('train_loss_total', loss, on_epoch=True, on_step=False)

        return loss

    def validation_step(self, batch, batch_idx):
        weights = self.m0(batch['series'][:, :, :])
        predictions = torch.bmm(weights.unsqueeze(1), batch['base']).squeeze()
        true_values = batch['series'][:, 0, -horizon:]
        # print(predictions, true_values)
        valid_loss = torch.sqrt(nn.functional.mse_loss(true_values.squeeze(), predictions.squeeze()))
        self.log('valid_loss', valid_loss, on_epoch=True, on_step=False)

        return valid_loss

    def test_step(self, batch, batch_idx):
        weights = self.m0(batch['series'][:, :, :])
        predictions = torch.bmm(weights.unsqueeze(1), batch['base']).squeeze()
        true_values = batch['series'][:, 0, -horizon:]
        # print(predictions, true_values)
        test_loss = torch.sqrt(nn.functional.mse_loss(true_values.squeeze(), predictions.squeeze()))
        self.log('test_loss', test_loss, on_epoch=True, on_step=False)

        return valid_loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.02)
        return optimizer

In [269]:
lite_m0 = LiteM0(MetaLearnerM0(8))

In [270]:
trainer = pl.Trainer(max_epochs=50,
                     enable_progress_bar=False,
                     callbacks=[EarlyStopping(monitor="valid_loss", mode="min")],
                     check_val_every_n_epoch=1)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [271]:
trainer.fit(lite_m0, train_dataloader, valid_dataloader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type          | Params
---------------------------------------
0 | m0   | MetaLearnerM0 | 204 K 
---------------------------------------
204 K     Trainable params
0         Non-trainable params
204 K     Total params
0.817     Total estimated model params size (MB)


In [272]:
def collect_predictions(dataloader):
    lite_m0.m0.eval()
    predictions = []
    with torch.inference_mode():
        for batch in dataloader:
            weights = lite_m0.m0(batch['series'][:, :, :])
            pred = torch.bmm(weights.unsqueeze(1), batch['base']).squeeze()
            predictions.append(pred)
    return predictions

In [273]:
train_pred = collect_predictions(train_dataloader)
valid_pred = collect_predictions(valid_dataloader)
test_pred = collect_predictions(test_dataloader)

In [274]:
combined_pred = train_pred + valid_pred + test_pred
combined_pred = torch.cat(combined_pred, dim=0)

In [280]:
combined_df = pd.DataFrame(combined_pred.numpy())
combined_df.to_csv(f'M0-h{horizon}', index=False)

In [282]:
combined_dataset = np.vstack([train_dataset.data[:, -horizon:, 0:1], valid_dataset.data[:, -horizon:, 0:1], test_dataset.data[:, -horizon:, 0:1]])