In [25]:
import sys
import warnings
warnings.simplefilter("ignore")

import numpy as np
import pandas as pd
from matplotlib import pylab as plt

import sklearn
sklearn.set_config(enable_metadata_routing=True)

from mapie.metrics import regression_coverage_score, regression_mean_width_score
from mapie.subsample import BlockBootstrap
from mapie.regression import MapieTimeSeriesRegressor, MapieRegressor

import torch
import json
import pickle

from sklearn.metrics import mean_squared_error

In [2]:
sys.path.append(r'C:/Users/obhlivoj/DP/System-Imbalance-Forecasting/models/transformer_future_lags/')

from config import get_config
from train import get_ds, get_model, greedy_decode
from transformer_dataset import TSDataset, causal_mask

path = r'C:/Users/obhlivoj/DP/System-Imbalance-Forecasting/models/transformer_future_lags'

In [3]:
from sklearn.base import BaseEstimator, RegressorMixin
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cfg = get_config()

cfg['tgt_seq_len'] = 1
cfg['val_seq_len'] = 1
cfg['num_epochs'] = 80

train_scl, val_scl, test_scl, label_scaler = get_ds(cfg, return_raw=True)
train_ds = TSDataset(
    train_scl, cfg['src_seq_len'], cfg['tgt_seq_len'])
val_ds = TSDataset(val_scl, cfg['src_seq_len'], cfg['tgt_seq_len'])
test_ds = TSDataset(test_scl, cfg['src_seq_len'], cfg['tgt_seq_len'])

# read json_info
with open(f'{path}/final_single_step_param.json', 'r') as file:
    best_params = json.load(file)

for param, value in best_params['best_params'][0].items():
    cfg[param] = value

model = get_model(cfg)

In [5]:
cv_mapiets = BlockBootstrap(
    n_resamplings=4, n_blocks=10, overlapping=False, random_state=69
)

alpha = 0.05

In [6]:
x_list = []
y_list = []
for item in train_ds+val_ds:
    x_list.append(torch.cat((item['encoder_input'], item['decoder_input'])).reshape(-1))
    y_list.append(item["label"].squeeze())

X = torch.stack(x_list).numpy()
y = torch.stack(y_list).numpy()

In [7]:
class PyTorchRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, model, cfg, device, scaler):
        self.model = model.to(device)
        self.cfg = cfg
        self.device = device
        self.n_features = len(cfg['exo_vars']+cfg['target'])

        self.enc_mask = torch.ones(1, cfg['src_seq_len'], cfg['src_seq_len']).bool()
        self.dec_mask = causal_mask(cfg['tgt_seq_len'])
        self.scaler = scaler

    def fit(self, X, y):
        # Convert X and y to PyTorch tensors and move them to the specified device
        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        y_tensor = torch.tensor(y, dtype=torch.float32).to(self.device)
        
        # Prepare dataset
        dataset = TensorDataset(X_tensor, y_tensor)
        dataloader = DataLoader(dataset, batch_size=self.cfg['batch_size'], shuffle=True)

        # Initialize optimizer and loss function
        optimizer = torch.optim.Adam(self.model.parameters(), lr=cfg['lr'], eps=1e-9)
        scheduler = torch.optim.lr_scheduler.LinearLR(
            optimizer, start_factor=1.0, end_factor=0.1, total_iters=30)
        loss_fn = nn.MSELoss().to(self.device)

        # Training loop
        self.model.train()
        for _ in range(self.cfg['num_epochs']):
            for input, label in dataloader:
                input_orig = input.reshape(-1, self.cfg['src_seq_len']+self.cfg['tgt_seq_len'], self.n_features)
                optimizer.zero_grad()

                encoder_input = input_orig[:, :self.cfg['src_seq_len']].to(self.device)
                decoder_input = input_orig[:, self.cfg['src_seq_len']:].to(self.device)
                encoder_mask = self.enc_mask.to(self.device)
                decoder_mask = self.dec_mask.to(self.device)

                encoder_output = self.model.encode(encoder_input, encoder_mask)
                decoder_output = self.model.decode(
                    encoder_output, encoder_mask, decoder_input, decoder_mask)
                proj_output = self.model.project(decoder_output)

                label = label.to(device)

                # Compute the loss using MSE, backpropagate the loss, update the weights
                loss = loss_fn(proj_output.view(-1), label.view(-1))
                loss.backward()

                optimizer.step()
                optimizer.zero_grad(set_to_none=True)

            scheduler.step()

        return self

    def predict(self, X):
        self.model.eval()

        X_tensor = torch.tensor(X, dtype=torch.float32).to(self.device)
        dataset = TensorDataset(X_tensor)
        val_dataloader = DataLoader(dataset, batch_size=self.cfg['batch_size'], shuffle=False)

        predicted = []
        with torch.no_grad():
            for input in val_dataloader:
                input_orig = input[0].reshape(-1, self.cfg['src_seq_len']+self.cfg['tgt_seq_len'], self.n_features)

                encoder_input = input_orig[:, :self.cfg['src_seq_len']].to(self.device)
                decoder_input = input_orig[:, self.cfg['src_seq_len']:].to(self.device)
                encoder_mask = self.enc_mask.to(self.device)

                model_out = greedy_decode(
                    self.model, self.cfg, encoder_input, encoder_mask, decoder_input, self.scaler, self.device)
                output = model_out.detach().cpu()
                predicted.append(output)

            pred_torch = torch.cat(predicted)

        # Move predictions to CPU for compatibility with scikit-learn
        return pred_torch.numpy().squeeze()

    def score(self, X, y):
        predictions = self.predict(X)
        return mean_squared_error(predictions, y, squared=True)

In [None]:
# def greedy_decode(self, encoder_input, encoder_mask, decoder_in):    
#     encoder_output = self.model.encode(encoder_input, encoder_mask)
#     decoder_input = decoder_in[:, 0:1, :].type_as(encoder_input).to(device)

#     for i in range(cfg['val_seq_len']):
#         # build a mask for the target (decoder input)
#         decoder_mask = causal_mask(decoder_input.size(
#             1)).type_as(encoder_mask).to(device)
#         # calculate the output of the decoder
#         out = model.decode(encoder_output, encoder_mask,
#                             decoder_input, decoder_mask)
#         # get the next token
#         pred = model.project(out)
#         if i == cfg['val_seq_len']-1:
#             break
#         pred_new = pred[:, -1, -1]
#         scaled_pred = torch.tensor(self.scaler.transform(pred_new.view(-1, 1).cpu()))

#         decoder_next = torch.clone(decoder_in[:, i+1, :].unsqueeze(1))
#         decoder_next[:, :, 0] = scaled_pred
#         decoder_input = torch.cat(
#             [decoder_input, decoder_next.type_as(encoder_input).to(device)], dim=1)


In [8]:
mlp_mapie = PyTorchRegressor(model, cfg, device, label_scaler)

In [9]:
mapie_ts = MapieTimeSeriesRegressor(
    mlp_mapie, method="enbpi", cv=cv_mapiets, agg_function="mean"
)
mapie_ts.fit(X, y)

In [10]:
x_list = []
y_list = []
for item in test_ds:
    x_list.append(torch.cat((item['encoder_input'], item['decoder_input'])).reshape(-1))
    y_list.append(item["label"].squeeze())

X_test = torch.stack(x_list).numpy()
y_test = torch.stack(y_list).numpy()

In [16]:
y_pred, y_pis = mapie_ts.predict(X_test, alpha=alpha)
coverage = regression_coverage_score(y_test, y_pis[:, 0, 0], y_pis[:, 1, 0])
width = regression_mean_width_score(y_pis[:, 0, 0], y_pis[:, 1, 0])

In [12]:
mean_squared_error(mapie_ts.predict(X_test), y_test, squared=False)

112.69202

In [22]:
width

495.07245572408027

In [28]:
trans_res = {
    "y_pred": y_pred,
    "y_pis": y_pis,
    "coverage": coverage,
    "width": width,
    "y_true": y_test
}

with open('./results/trans_dict.pkl', 'wb') as f:
    pickle.dump(trans_res, f)