In [None]:
!rm -rf qber-forecasting
!rm -rf deep_qber

In [1]:
!git clone https://github.com/rmnigm/qber-forecasting.git
!pip install wandb
!pip install pytorch_lightning torchmetrics

Cloning into 'qber-forecasting'...
remote: Enumerating objects: 76, done.[K
remote: Counting objects: 100% (76/76), done.[K
remote: Compressing objects: 100% (62/62), done.[K
remote: Total 76 (delta 27), reused 52 (delta 10), pack-reused 0[K
Unpacking objects: 100% (76/76), 26.14 MiB | 3.51 MiB/s, done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.15.1-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m52.5 MB/s[0m eta [36m0:00:00[0m
Collecting GitPython!=3.1.29,>=1.0.0
  Downloading GitPython-3.1.31-py3-none-any.whl (184 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setproctitle
  Downloading setproctitle-1.3.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting docker-p

In [2]:
!cp -r qber-forecasting/deep_qber deep_qber

In [3]:
import os
import random
import sys

import wandb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from tqdm import tqdm

import sklearn
from sklearn.preprocessing import MinMaxScaler, StandardScaler, QuantileTransformer

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torchmetrics.functional import mean_squared_error, mean_absolute_percentage_error 

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger

In [4]:
from deep_qber import seed_everything, setup_dataset
from deep_qber import TorchTSDataset, ModelInterfaceTS, ModuleTS

In [5]:
# optional
wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [75]:
class TorchTSDataset(Dataset):
    def __init__(self,
                 dataset,
                 target_index=0,
                 look_back=1,
                 device='cpu'):
        length = dataset.shape[0] - look_back - 1
        width = dataset.shape[1]
        mask = np.array([i != target_index for i in range(width)])
        x_current = np.empty((length, 1, width - 1))
        x, y = np.empty((length, look_back, width)), np.empty((length, 1))
        for i in range(length):
            x[i] = dataset[i:(i + look_back), :]
            x_current[i] = dataset[i + look_back, mask]
            y[i] = dataset[i + look_back, target_index]
        self.X = torch.tensor(x).float().to(device)
        self.y = torch.tensor(y).float().to(device)
        self.X_current = torch.tensor(x_current).float().to(device)
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return (self.X[idx], self.X_current[idx]), self.y[idx]


def setup_dataset(dataset,
                  look_back: int = 5,
                  train_size: float = 0.8,
                  scaler=None,
                  batch_size: int = 64,
                  shuffle: bool = False,
                  device: str = 'cpu'):
    train_size = int(len(dataset) * train_size)
    test_size = len(dataset) - train_size
    data_train, data_test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]
    print("Training set size = {}, testing set size = {}".format(train_size, test_size))

    if scaler is not None:
        scaler.fit(data_train)
        data_train = scaler.transform(data_train)
        data_test = scaler.transform(data_test)

    train_set = TorchTSDataset(data_train,
                               target_index=0,
                               look_back=look_back,
                               device=device)
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              shuffle=shuffle)
    test_set = TorchTSDataset(data_test,
                              target_index=0,
                              look_back=look_back,
                               device=device)
    test_loader = DataLoader(test_set,
                             batch_size=batch_size,
                             shuffle=shuffle)
    return train_loader, test_loader

In [76]:
class ModuleTS(pl.LightningModule):
    def __init__(self, model, loss, lr=1e-5):
        super().__init__()
        self.model = model
        self.loss = loss
        self.lr = lr
        self.loss_multiplier = 1e4
        self.save_hyperparameters(ignore=['model'])

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
        return optimizer

    def training_step(self, train_batch, batch_idx):
        data, target = train_batch
        predictions = self.forward(data)
        loss = self.loss_multiplier * self.loss(predictions, target)
        self.log("Train Loss", loss, prog_bar=True)
        metrics = self.model.get_metrics(predictions, target)
        self.log("Train MSE", metrics["MSE"], prog_bar=True)
        self.log("Train MAPE", metrics["MAPE"], prog_bar=True)
        return loss

    def validation_step(self, val_batch, batch_idx):
        data, target = val_batch
        preds = self.forward(data)
        loss = self.loss_multiplier * self.loss(preds, target)
        metrics = self.model.get_metrics(preds, target)
        self.log("Validation Loss", loss, prog_bar=True)
        self.log("Validation MSE", metrics["MSE"], prog_bar=True)
        self.log("Validation MAPE", metrics["MAPE"], prog_bar=True)

In [77]:
pulses_stats_file_path = "/content/qber-forecasting/datasets/fr_gains.csv"
dataframe = pd.read_csv(pulses_stats_file_path,
                        usecols=[0, 1, 2, 3, 4, 5, 6],
                        engine='python'
                        )
dataset = dataframe.values.astype('float32')
dataset = dataset[:100000]

In [152]:
# scaler = QuantileTransformer(n_quantiles=20, output_distribution="normal")
# scaler = StandardScaler()
config = {
    "learning_rate": 1e-7,
    "look_back": 20,
    "input_size": 7,
    "output_size": 1,
    "hidden_size": 256,
    "batch_size": 256,
    "epochs": 50,
    "loss": "MSE",
    "scaler": None,
    "model": "lower look back and low lr"
}
train_size = 0.8
loss = nn.MSELoss()
scaler = None

In [156]:
seed_everything(123456)
train_loader, test_loader = setup_dataset(dataset,
                                          config["look_back"],
                                          train_size,
                                          config["scaler"],
                                          config["batch_size"],
                                          device=device)

input_size = config["input_size"]
look_back = config["look_back"]
hidden_size = config["hidden_size"]
output_size = config["output_size"]

Training set size = 80000, testing set size = 20000


In [89]:
class ExtractorLSTM(nn.Module):
    def __init__(self, input_size, output_size, hid_size=128):
        """
        Базовая модель encoder-decoder архитектуры
        """
        super().__init__() 
        self.input_size = input_size
        self.output_size = output_size
        self.hid_size = hid_size
        self.lstm = nn.LSTM(input_size,
                            hid_size,
                            batch_first=True
                            )
        self.dense = nn.Sequential(
            nn.Linear(input_size - 1, hid_size),
            nn.LeakyReLU(),
            nn.Linear(hid_size, hid_size),
        )
        self.regressor = nn.Linear(2 * hid_size, output_size)
        
    def forward(self, data):
        x, x_current = data
        x, _ = self.lstm(x)
        past_features = x[:, -1, :]
        current_features = self.dense(x_current)[:, -1, :]
        features = torch.cat((past_features, current_features), 1)
        return self.regressor(features)

In [157]:
model = ExtractorLSTM(input_size=config["input_size"],
                      hid_size=config["hidden_size"],
                      output_size=config["output_size"]
                      )

In [98]:
metrics = {
    "MSE": mean_squared_error,
    "MAPE": mean_absolute_percentage_error
    }

In [137]:
from deep_qber import ExpSmoothing


target_index = 0
exp_model = ExpSmoothing(1, 5)
mape_vals = []
mse_vals = []

for batch, target in train_loader:
    predictions = []
    targets = []
    x, _ = batch
    x = x.cpu().numpy()
    for trajectory, label in zip(x[:, -6:, target_index], target):
        for val in trajectory:
            exp_model.update(val)
        predictions.append(exp_model.get())
        targets.append(label.cpu().numpy()[-1])
    targets = torch.tensor(targets)
    predictions = torch.tensor(predictions)
    mape_vals.append(float(metrics["MAPE"](predictions, targets)))
    mse_vals.append(float(metrics["MSE"](predictions, targets)))

In [145]:
def run_experiment(train_loader, test_loader, model, loss, config, name):
    with wandb.init(project="qber-forecasting",
                    entity="rmnigm",
                    settings=wandb.Settings(start_method="thread"),
                    config=config,
                    name=name,
                    ) as run:
        wandb_logger = WandbLogger(log_model='all')
        checkpoint_callback = ModelCheckpoint(monitor="Validation MAPE", mode="min")

        epochs = config["epochs"]

        model_interface = ModelInterfaceTS(model)
        module = ModuleTS(model_interface, loss, lr=config["learning_rate"])

        trainer = pl.Trainer(logger=wandb_logger,
                            callbacks=[checkpoint_callback],
                            accelerator="gpu",
                            max_epochs=epochs,
                            )
        
        trainer.fit(module, train_loader, test_loader)

        run.finish()

In [158]:
name = config["model"]


run_experiment(train_loader, test_loader, model, loss, config, name)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type             | Params
-------------------------------------------
0 | model | ModelInterfaceTS | 339 K 
1 | loss  | MSELoss          | 0     
-------------------------------------------
339 K     Trainable params
0         Non-trainable params
339 K     Total params
1.358     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=50` reached.


0,1
Train Loss,█▇▆▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train MAPE,█▇▇▇▆▅▅▅▄▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train MSE,█▇▆▅▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Validation Loss,█▇▆▆▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Validation MAPE,██▇▇▆▅▅▅▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Validation MSE,█▇▆▆▄▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
trainer/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
Train Loss,0.0366
Train MAPE,0.16922
Train MSE,0.0
Validation Loss,0.09198
Validation MAPE,0.16959
Validation MSE,1e-05
epoch,49.0
trainer/global_step,15649.0
