# Sparsity loss

pytorch-forecasting 패키지에서 제공해주는 quantile loss를 변경해서 confusion matrix의 비율에 따라서 다르게 적용되도록 변경

> QuantileLoss : https://pytorch-forecasting.readthedocs.io/en/stable/_modules/pytorch_forecasting/metrics/quantile.html

In [1]:
# 원본 quantile loss
"""Quantile metrics for forecasting multiple quantiles per time step."""
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import torch

from pytorch_forecasting.metrics.base_metrics import MultiHorizonMetric


class QuantileLoss(MultiHorizonMetric):
    """
    Quantile loss, i.e. a quantile of ``q=0.5`` will give half of the mean absolute error as it is calculated as

    Defined as ``max(q * (y-y_pred), (1-q) * (y_pred-y))``
    """

    def __init__(
        self,
        quantiles: List[float] = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98],
        **kwargs,
    ):
        """
        Quantile loss

        Args:
            quantiles: quantiles for metric
        """
        super().__init__(quantiles=quantiles, **kwargs)

    def loss(self, y_pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        # calculate quantile loss
        losses = []
        for i, q in enumerate(self.quantiles):
            errors = target - y_pred[..., i]
            losses.append(torch.max((q - 1) * errors, q * errors).unsqueeze(-1))
        losses = 2 * torch.cat(losses, dim=2)

        return losses


    def to_prediction(self, y_pred: torch.Tensor) -> torch.Tensor:
        """
        Convert network prediction into a point prediction.

        Args:
            y_pred: prediction output of network

        Returns:
            torch.Tensor: point prediction
        """
        if y_pred.ndim == 3:
            idx = self.quantiles.index(0.5)
            y_pred = y_pred[..., idx]
        return y_pred


    def to_quantiles(self, y_pred: torch.Tensor) -> torch.Tensor:
        """
        Convert network prediction into a quantile prediction.

        Args:
            y_pred: prediction output of network

        Returns:
            torch.Tensor: prediction quantiles
        """
        return y_pred




In [2]:
class QuantileLoss(MultiHorizonMetric):
    def __init__(
        self,
        quantiles: List[float] = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98],
        **kwargs,
    ):
        super().__init__(quantiles=quantiles, **kwargs)

    def loss(self, y_pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        losses = []
        for i, q in enumerate(self.quantiles):
            errors = target - y_pred[..., i]
            losses.append(torch.max((q - 1) * errors, q * errors).unsqueeze(-1))
        losses = 2 * torch.cat(losses, dim=2)

        return losses

    def to_prediction(self, y_pred: torch.Tensor) -> torch.Tensor:
        if y_pred.ndim == 3:
            idx = self.quantiles.index(0.5)
            y_pred = y_pred[..., idx]
        return y_pred

    def to_quantiles(self, y_pred: torch.Tensor) -> torch.Tensor:
        return y_pred


In [3]:
import torch

real_t = torch.zeros(4, 24)
pred_t = torch.zeros(4, 24, 7)

real_t[0][0] = 1

In [4]:
losses = QuantileLoss()
losses.loss(pred_t,real_t).shape , losses.loss(pred_t,real_t).dtype

(torch.Size([4, 24, 7]), torch.float32)

In [7]:
class CustomLoss(MultiHorizonMetric):
    def __init__(
        self,
        quantiles: List[float] = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98],
        **kwargs,
    ):
        super().__init__(quantiles=quantiles, **kwargs)

    def loss(self, y_pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        losses = []
        for i, q in enumerate(self.quantiles):
            errors = target - y_pred[..., i]
            losses.append(torch.max((q - 1) * errors, q * errors).unsqueeze(-1))
        losses = 2 * torch.cat(losses, dim=2)

        return losses

    def to_prediction(self, y_pred: torch.Tensor) -> torch.Tensor:
        if y_pred.ndim == 3:
            idx = self.quantiles.index(0.5)
            y_pred = y_pred[..., idx]
        return y_pred

    def to_quantiles(self, y_pred: torch.Tensor) -> torch.Tensor:
        return y_pred


In [8]:
import pandas as pd

def quantile_loss(tau, real, pred):
    error = real - 1.5 * pred
    return tau * max(error, 0) + (1 - tau) * max(-error, 0)

data_points = [
    {"real": 0, "pred": 0, "confusion_matrix": "TN", "error": 0},
    {"real": 0, "pred": 0.5, "confusion_matrix": "", "error": 0.75},
    {"real": 0, "pred": 1, "confusion_matrix": "FP", "error": 1.5},
    {"real": 1, "pred": 0, "confusion_matrix": "FN", "error": 1},
    {"real": 1, "pred": 0.5, "confusion_matrix": "", "error": 0.25},
    {"real": 1, "pred": 1, "confusion_matrix": "TP", "error": 0.5},
]

taus = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]

for tau in taus:
    for data_point in data_points:
        real = data_point['real']
        pred = data_point['pred']
        data_point[f'quantile_loss_{tau}'] = quantile_loss(tau, real, pred)

df = pd.DataFrame(data_points)
df


Unnamed: 0,real,pred,confusion_matrix,error,quantile_loss_0.02,quantile_loss_0.1,quantile_loss_0.25,quantile_loss_0.5,quantile_loss_0.75,quantile_loss_0.9,quantile_loss_0.98
0,0,0.0,TN,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,0.5,,0.75,0.735,0.675,0.5625,0.375,0.1875,0.075,0.015
2,0,1.0,FP,1.5,1.47,1.35,1.125,0.75,0.375,0.15,0.03
3,1,0.0,FN,1.0,0.02,0.1,0.25,0.5,0.75,0.9,0.98
4,1,0.5,,0.25,0.005,0.025,0.0625,0.125,0.1875,0.225,0.245
5,1,1.0,TP,0.5,0.49,0.45,0.375,0.25,0.125,0.05,0.01


In [11]:
class CustomLoss(MultiHorizonMetric):
    def __init__(
        self,
        quantiles: List[float] = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98],
        **kwargs,
    ):
        super().__init__(quantiles=quantiles, **kwargs)

    def loss(self, y_pred: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
        losses = []
        for i, q in enumerate(self.quantiles):
            errors = target - 1.5 * y_pred[..., i]
            losses.append(torch.max((q - 1) * errors, q * errors).unsqueeze(-1))
        losses = 2 * torch.cat(losses, dim=2)

        return losses

    def to_prediction(self, y_pred: torch.Tensor) -> torch.Tensor:
        if y_pred.ndim == 3:
            idx = self.quantiles.index(0.5)
            y_pred = y_pred[..., idx]
        return y_pred

    def to_quantiles(self, y_pred: torch.Tensor) -> torch.Tensor:
        return y_pred


In [12]:
import os
import copy
import time
import json
import torch
from pathlib import Path
import warnings

import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
from lightning.pytorch.loggers import TensorBoardLogger
import numpy as np
import pandas as pd
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import MAE, SMAPE, PoissonLoss, QuantileLoss

from IPython.core.display import HTML
from plotly.subplots import make_subplots

warnings.filterwarnings("ignore")  # avoid printing out absolute paths
import plotly.graph_objects as go


#lr = 0.001
hidden = 64
atten_head = 4
dropout = 0.1
quantiles = [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]
alpha_start = 0.7


filter = np.array([2, 1, 0]) 

def triangle_conv(time_series):
    global filter
    size = len(filter)
    shift = size // 2
    conv_result = np.convolve(time_series, filter, mode='same')
    conv_result = np.roll(conv_result, shift)
    return conv_result

def inv_conv(time_series):
    return time_series

transformation_dict = {
    'forward': triangle_conv,
    'reverse': inv_conv, 
}



In [13]:
max_prediction_length = 24
max_encoder_length = 24*7

data = pd.read_csv('../../../DataProcessing/train_data/Long Beach_5_split.csv')
noise = np.random.uniform(0, 1.0, size=data['Accient'].shape)
data['Accient'] = data['Accient'] + noise

#data = data[lambda x : x.time_idx <= 400]
training_cutoff = data["time_idx"].max() - max_prediction_length

training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="Accient",
    group_ids=["GeoHash"],
    min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=[],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=data.columns[4: -2].tolist(),
    target_normalizer=GroupNormalizer(
        groups=["GeoHash"], transformation=transformation_dict
    ),  # use softplus and normalize by group
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)


validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True)

# create dataloaders for model
batch_size = 128  # set this between 32 to 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)




In [14]:
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-6, patience=10, verbose=False, mode="min")
logger = TensorBoardLogger(f"lightning_logs/test")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=10,
    accelerator="gpu",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[early_stop_callback],
    logger=logger,
    devices = [0]
)

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=1e-4,
    hidden_size=16,
    attention_head_size=2,
    dropout=0.1,
    hidden_continuous_size=16,
    loss=CustomLoss(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    optimizer="Ranger",
    output_size = 7,
    reduce_on_plateau_patience=4,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Number of parameters in network: 42.5k


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | CustomLoss                      | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 0     
3  | prescalers                         | ModuleDict                      | 608   
4  | static_variable_selection          | VariableSelectionNetwork        | 3.7 K 
5  | encoder_variable_selection         | VariableSelectionNetwork        | 23.7 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 1.2 K 
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K 
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K 
9  | static_context_initial_cell_lst

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]