In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# a simplified version of DeepAR model
# https://arxiv.org/pdf/1704.04110.pdf

# references
# LSTM: https://pytorch.org/docs/stable/generated/torch.nn.LSTM.html
# deepAR: https://github.com/zhykoties/TimeSeries/blob/master/model/net.py

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import StandardScaler

In [None]:
# load data
df = pd.read_csv('drive/MyDrive/kaggle/train.csv')

In [None]:
# data preprocessing

# drop row with nan values
df.dropna(inplace=True)

features_df = df.drop(["target", "row_id", "time_id"], axis=1)
target_df = df['target']

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features_df)

In [None]:
# train test split, dataloader
batch_size = 1024

features_tensor = torch.tensor(features_scaled, dtype=torch.float32)
target_tensor = torch.tensor(target_df.values, dtype=torch.float32)

X_train, X_test, y_train, y_test = train_test_split(features_tensor, target_tensor, test_size=0.2, random_state=42)

class TimeSeriesDataset(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

train_dataset = TimeSeriesDataset(X_train, y_train)
test_dataset = TimeSeriesDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
# simplified DeepAR model

class DeepAR(nn.Module):
    def __init__(self, input_size, hidden_size, lstm_layers=2, device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')):
        super(DeepAR, self).__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm_layers = lstm_layers
        self.device = device

        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=self.lstm_layers, batch_first=True)

        self.distribution_mu = nn.Linear(hidden_size * self.lstm_layers, 1)
        self.distribution_presigma = nn.Linear(hidden_size * self.lstm_layers, 1)
        self.distribution_sigma = nn.Softplus()     # make sure sigma is positive

    '''
        x: (batch_size, seq_len, input_size)
        hidden_state: (num_layers, batch_size, hidden_size)
        cell_state: (num_layers, batch_size, hidden_size)
    '''
    def forward(self, x, hidden_state=None, cell_state=None):
        x = x.unsqueeze(1) # seq len is 1

        if hidden_state is None:
            hidden_state = self.init_hidden(x.shape[0])
        if cell_state is None:
            cell_state = self.init_cell(x.shape[0])

        lstm_out, (hidden, cell) = self.lstm(x, (hidden_state, cell_state))
        hidden_permute = hidden.permute(1, 0, 2).contiguous().view(hidden.shape[1], -1)

        # Predicting mu and sigma
        mu = self.distribution_mu(hidden_permute)
        pre_sigma = self.distribution_presigma(hidden_permute)
        sigma = self.distribution_sigma(pre_sigma)

        return mu, sigma, hidden, cell

    def init_hidden(self, batch_size):
        return torch.zeros(self.lstm_layers, batch_size, self.hidden_size, device=self.device)

    def init_cell(self, batch_size):
        return torch.zeros(self.lstm_layers, batch_size, self.hidden_size, device=self.device)

# gaussian log likelihood loss
def gaussian_likelihood_loss(mu, sigma, y):
    return torch.mean(0.5 * torch.log(sigma**2) + 0.5 * ((y - mu) / sigma)**2)

# mae
def mae(predictions, targets):
    return torch.mean(torch.abs(predictions - targets))

# mse
def mse(predictions, targets):
    return torch.mean((predictions - targets) ** 2)

# rmse
def rmse(predictions, targets):
    return torch.sqrt(mse(predictions, targets))

In [None]:
# train and evaluate

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = DeepAR(input_size=14, hidden_size=30, lstm_layers=2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

epochs = 30
loss_list, mae_list, mse_list, rmse_list = [], [], [], []

# training
for epoch in range(epochs):
    total_loss, total_mae, total_mse, total_rmse, count = 0, 0, 0, 0, 0
    model.train()
    for i, (x, y) in enumerate(train_loader):
        x, y = x.to(device), y.to(device)

        optimizer.zero_grad()

        batch_size = x.shape[0]
        hidden_state, cell_state = model.init_hidden(batch_size), model.init_cell(batch_size)
        mu, sigma, hidden_state, cell_state = model(x, hidden_state, cell_state)

        hidden_state = hidden_state.detach()
        cell_state = cell_state.detach()

        loss = gaussian_likelihood_loss(mu, sigma, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        total_mae += mae(mu, y).item()
        total_mse += mse(mu, y).item()
        total_rmse += rmse(mu, y).item()
        count += 1

        if i % 100 == 0:
            print(f'Epoch {epoch+1}, Step {i+1}, Loss: {loss.item():.4f}')

    avg_loss = total_loss / count
    avg_mae = total_mae / count
    avg_mse = total_mse / count
    avg_rmse = total_rmse / count

    loss_list.append(avg_loss)
    mae_list.append(avg_mae)
    mse_list.append(avg_mse)
    rmse_list.append(avg_rmse)

    print(f'Epoch {epoch+1}, Loss: {avg_loss:.4f}, MAE: {avg_mae:.4f}, MSE: {avg_mse:.4f}, RMSE: {avg_rmse:.4f}')

# evaluation
model.eval()
with torch.no_grad():
    total_loss, total_mae, total_mse, total_rmse, count = 0, 0, 0, 0, 0

    for x, y in test_loader:
        x, y = x.to(device), y.to(device)

        batch_size = x.shape[0]
        hidden_state, cell_state = model.init_hidden(batch_size), model.init_cell(batch_size)
        mu, sigma, hidden_state, cell_state = model(x, hidden_state, cell_state)

        hidden_state = hidden_state.detach()
        cell_state = cell_state.detach()

        loss = gaussian_likelihood_loss(mu, sigma, y)

        total_loss += loss.item()
        total_mae += mae(mu, y).item()
        total_mse += mse(mu, y).item()
        total_rmse += rmse(mu, y).item()
        count += 1

    avg_loss = total_loss / count
    avg_mae = total_mae / count
    avg_mse = total_mse / count
    avg_rmse = total_rmse / count

    print(f'TestingLoss: {avg_loss:.4f}, MAE: {avg_mae:.4f}, MSE: {avg_mse:.4f}, RMSE: {avg_rmse:.4f}')

# save model
torch.save(model.state_dict(), 'simple_deep_ar.pth')

Epoch 1, Step 1, Loss: 75.4194
Epoch 1, Step 101, Loss: 71.5931
Epoch 1, Step 201, Loss: 67.1535
Epoch 1, Step 301, Loss: 73.2590
Epoch 1, Step 401, Loss: 65.5580
Epoch 1, Step 501, Loss: 70.3176
Epoch 1, Step 601, Loss: 66.9521
Epoch 1, Step 701, Loss: 70.4690
Epoch 1, Step 801, Loss: 70.6991
Epoch 1, Step 901, Loss: 65.9483
Epoch 1, Step 1001, Loss: 74.3493
Epoch 1, Step 1101, Loss: 70.3176
Epoch 1, Step 1201, Loss: 75.2050
Epoch 1, Step 1301, Loss: 52.6095
Epoch 1, Step 1401, Loss: 81.4270
Epoch 1, Step 1501, Loss: 66.6275
Epoch 1, Step 1601, Loss: 64.2615
Epoch 1, Step 1701, Loss: 56.7750
Epoch 1, Step 1801, Loss: 68.9637
Epoch 1, Loss: 70.5462, MAE: 5.7093, MSE: 70.2121, RMSE: 8.3527
Epoch 2, Step 1, Loss: 76.0803
Epoch 2, Step 101, Loss: 63.1941
Epoch 2, Step 201, Loss: 62.3890
Epoch 2, Step 301, Loss: 66.7512
Epoch 2, Step 401, Loss: 59.4048
Epoch 2, Step 501, Loss: 56.2814
Epoch 2, Step 601, Loss: 55.6256
Epoch 2, Step 701, Loss: 56.9672
Epoch 2, Step 801, Loss: 63.1124
Epoch 2