In [1]:
import os
import sys

dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)

os.chdir('..')

In [2]:
from pathlib import Path

import pandas as pd

from hydra import initialize, compose
from hydra.utils import instantiate

from torch.utils.data import Dataset


from ptls.preprocessing import PandasDataPreprocessor
from ptls.frames import PtlsDataModule

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.model_selection import train_test_split

from src.preprocessing import preprocess
from src.utils.create_trainer import create_trainer


In [3]:
with initialize(config_path="../config", version_base=None):
    cfg = compose(config_name="master")
    
# cfg_preprop = cfg["dataset"]
# cfg_model = cfg["model"]
backbone_cfg = cfg["backbone"]
logger_cfg = cfg["logger"]

In [4]:
data = preprocess(cfg["preprocessing"])

In [5]:
train, val, test = data

train_data: Dataset = instantiate(
    backbone_cfg["dataset"], data=train, deterministic=False
)
val_data: Dataset = instantiate(
    backbone_cfg["dataset"], data=val, deterministic=True
)
test_data: Dataset = instantiate(
    backbone_cfg["dataset"], data=test, deterministic=True
)

In [6]:
datamodule: PtlsDataModule = instantiate(
    {
        '_target_': 'ptls.frames.PtlsDataModule', 
        'train_batch_size': 512, 
        'train_num_workers': 4, 
        'valid_batch_size': 512, 
        'valid_num_workers': 4
    },
    train_data=train_data,
    valid_data=val_data,
    test_data=test_data,
)

In [7]:
from copy import deepcopy
from pathlib import Path
from typing import Any, Literal, Optional, Union
from omegaconf import DictConfig
from pytorch_lightning.utilities.types import STEP_OUTPUT
from pytorch_lightning import LightningModule
from sklearn import multiclass

import torch
from torch import nn, Tensor

from hydra.utils import instantiate
from torchmetrics import (
    AUROC,
    Accuracy,
    AveragePrecision,
    F1Score,
    Metric,
    MetricCollection,
    MultitaskWrapper,
    R2Score,
)
from torchmetrics.functional import auroc, f1_score, r2_score, average_precision

from ptls.data_load import PaddedBatch
from ptls.nn.seq_encoder.containers import SeqEncoderContainer

from pytorch_lightning.utilities.types import LRSchedulerTypeUnion
from src.nn.decoders.base import AbsDecoder
from src.utils.logging_utils import get_logger


class GPTModule(LightningModule):
    """A vanilla autoencoder, without masking, just encodes target sequence and then restores it.
    Logs train/val/test losses:
     - a CrossEntropyLoss on mcc codes
     - an MSELoss on amounts
    and train/val/test metrics:
     - a macro-averaged multiclass f1-score on mcc codes
     - a macro-averaged multiclass auroc score on mcc codes
     - an r2-score on amounts

     Attributes:
        out_amount (nn.Linear):
            A linear layer, which restores the transaction amounts.
        out_mcc (nn.Linear):
            A linear layer, which restores the transaction mcc codes.
        amount_loss_weight (float):
            Normalized loss weight for the transaction amount MSE loss.
        mcc_loss_weight (float):
            Normalized loss weight for the transaction mcc code CE loss.
        lr (float):
            The learning rate, extracted from the optimizer_config.
        ae_output_size (int):
            The output size of the decoder.

    Notes:
        amount_loss_weight, mcc_loss_weight are normalized so that amount_loss_weight + mcc_loss_weight = 1.
        This is done to remove one hyperparameter. Loss gradient size can be managed separately through lr.

    """

    def __init__(
        self,
        loss_weights: dict[Literal["amount", "mcc"], float],
        seq_encoder: DictConfig,
        mcc_head: DictConfig,
        amount_head: DictConfig,
        optimizer: DictConfig, 
        scheduler: Optional[DictConfig] = None,
        scheduler_config: Optional[dict] = None,
    ) -> None:
        """Initialize VanillaAE internal state.

        Args:
            loss_weights (dict):
                A dictionary with keys "amount" and "mcc", mapping them to the corresponding loss weights
            encoder (SeqEncoderContainer):
                SeqEncoderContainer to be used as an encoder.
            mcc_head (DictConfig):
                DictConfig for mcc head, instantiated with in_channels keyword argument.
            amount_head (DictConfig):
                Partial dictconfig for amount head, instantiated with in_channels keyword argument.
            optimizer (DictConfig):
                Optimizer dictconfig, instantiated with params kwarg.
            decoder (AbsDecoder):
                AbsDecoder, to be used as the decoder.
            scheduler (Optional[DictConfig]):
                Optionally, an lr scheduler dictconfig, instantiated with optimizer kwarg
            scheduler_config (Optional[dict]):
                An lr_scheduler config for specifying scheduler-specific params, such as which metric to monitor
                See LightningModule.configure_optimizers docstring for more details.
            encoder_weights (Optional[str], optional):
                Path to encoder weights. If not specified, no weights are loaded by default.
            decoder_weights (Optional[str], optional):
                Path to decoder weights. If not specified, no weights are loaded by default.
            unfreeze_enc_after (Optional[int], optional):
                Number of epochs to wait before unfreezing encoder weights.
                The module doesn't get frozen by default.
                A negative number would freeze the weights indefinetly.
            unfreeze_dec_after (Optional[int], optional):
                Number of epochs to wait before unfreezing encoder weights.
                The module doesn't get frozen by default.
                A negative number would freeze the weights indefinetly.
            reconstruction_len (Optional[int]):
                length of reconstructed batch in predict_step, optional.
                If None, determine length from batch.seq_lens.
                If int, reconstruct that many tokens.
        """
        super().__init__()
        self.save_hyperparameters()

        self.seq_encoder: SeqEncoderContainer = instantiate(seq_encoder)

        self.amount_head = instantiate(amount_head, in_channels=self.seq_encoder.embedding_size)

        self.mcc_head = instantiate(mcc_head, in_channels=self.seq_encoder.embedding_size)

        self.optimizer_dictconfig = optimizer
        self.scheduler_dictconfig = scheduler
        self.scheduler_config = scheduler_config or {}

        self.amount_loss_weight = loss_weights["amount"] / sum(loss_weights.values())
        self.mcc_loss_weight = loss_weights["mcc"] / sum(loss_weights.values())

        self.mcc_criterion = nn.CrossEntropyLoss(ignore_index=0)
        self.amount_criterion = nn.MSELoss()

        multiclass_args: dict[str, Any] = dict(
            task="multiclass",
            num_classes=self.mcc_head[-2].out_features,
            ignore_index=0,
        )

        MetricsType = dict[Literal["mcc", "amount"], MetricCollection]
        def make_metrics(stage: str) -> MetricsType:
            return nn.ModuleDict({
                "mcc": MetricCollection(
                    AUROC(**multiclass_args, average="weighted"),
                    F1Score(**multiclass_args, average="micro"),
                    AveragePrecision(**multiclass_args, average="weighted"),
                    Accuracy(**multiclass_args, average="micro"),
                    prefix=stage
                ),
                "amount": MetricCollection(R2Score(), prefix=stage),
            }) # type: ignore

        self.train_metrics: MetricsType = make_metrics("train")
        self.val_metrics: MetricsType = make_metrics("val")
        self.test_metrics: MetricsType = make_metrics("test")

    def forward(self, x):
        return self.seq_encoder(x)

    @property
    def metric_name(self):
        return "val_loss"

    def _calculate_losses(
        self,
        mcc_pred: Tensor,
        amount_pred: Tensor,
        mcc_target: Tensor,
        amount_target: Tensor,
        mask: Tensor,
    ) -> dict[str, Tensor]:
        """Calculate the losses, weigh them with respective weights

        Args:
            mcc_pred (Tensor): Predicted mcc logits, (B, L, mcc_vocab_size).
            amount_pred (Tensor): Predicted amounts, (B, L).
            mcc_target (Tensor): target mcc codes.
            amount_target (Tensor): target amounts.
            mask (Tensor): mask of non-padding elements

        Returns:
            Dictionary of losses, with keys loss, loss_mcc, loss_amt.
        """
        mcc_loss = self.mcc_criterion(mcc_pred[mask], mcc_target[mask])
        amount_loss = self.amount_criterion(amount_pred[mask], amount_target[mask])

        total_loss = (
            self.mcc_loss_weight * mcc_loss + self.amount_loss_weight * amount_loss
        )

        return {"loss": total_loss, "loss_mcc": mcc_loss, "loss_amt": amount_loss}

    def shared_step(
        self,
        stage: Literal["train", "val", "test"],
        batch: PaddedBatch,
        *args,
        **kwargs,
    ) -> STEP_OUTPUT:
        """Generalized function to do a train/val/test step.

        Args:
            stage (str): train, val, or test, depending on the stage.
            batch (PaddedBatch): Input.
            batch_idx (int): ignored

        Returns:
            STEP_OUTPUT:
                if stage == "train", returns total loss.
                else returns a dictionary of metrics.
        """

        embeddings = self(batch).payload

        mcc_pred = self.mcc_head(embeddings)[:, :-1, :]
        amount_pred = self.amount_head(embeddings)[:, :-1].squeeze(-1)
        
        mcc_target = batch.payload["mcc_code"][:, 1:]
        amount_target = torch.log(batch.payload["amount"][:, 1:] + 1)  # Logarithmize targets

        nonpad_mask = batch.seq_len_mask[:, 1:].bool()

        loss_dict = self._calculate_losses(
            mcc_pred, amount_pred, mcc_target, amount_target, nonpad_mask
        )

        metrics = {
            "train": self.train_metrics,
            "val": self.val_metrics,
            "test": self.test_metrics,
        }[stage]

        metrics["mcc"].update(mcc_pred[nonpad_mask], mcc_target[nonpad_mask])
        metrics["amount"].update(amount_pred[nonpad_mask], amount_target[nonpad_mask])

        self.log_dict(
            {f"{stage}_{k}": v for k, v in loss_dict.items()},
            on_step=False,
            on_epoch=True,
            batch_size=batch.seq_feature_shape[0],
        )
        
        for metric in metrics.values():
            self.log_dict(
                metric, # type: ignore
                on_step=False,
                on_epoch=True,
                batch_size=batch.seq_feature_shape[0],
            )

        return loss_dict["loss"]

    def training_step(self, *args, **kwargs) -> STEP_OUTPUT:
        return self.shared_step("train", *args, **kwargs)

    def validation_step(self, *args, **kwargs) -> Union[STEP_OUTPUT, None]:
        return self.shared_step("val", *args, **kwargs)

    def test_step(self, *args, **kwargs) -> Union[STEP_OUTPUT, None]:
        return self.shared_step("test", *args, **kwargs)

    def configure_optimizers(self):
        optimizer = instantiate(self.optimizer_dictconfig, params=self.parameters())

        if self.scheduler_dictconfig:
            scheduler = instantiate(self.scheduler_dictconfig, optimizer=optimizer)
            scheduler_config = {"scheduler": scheduler, **self.scheduler_config}

            return [optimizer], [scheduler_config]

        return optimizer

    # Overriding lr_scheduler_step to fool the exception (which doesn't appear in later versions of pytorch_lightning):
    # pytorch_lightning.utilities.exceptions.MisconfigurationException:
    #   The provided lr scheduler `...` doesn't follow PyTorch's LRScheduler API.
    #   You should override the `LightningModule.lr_scheduler_step` hook with your own logic if you are using a custom LR scheduler.
    def lr_scheduler_step(
        self, scheduler: LRSchedulerTypeUnion, optimizer_idx: int, metric
    ) -> None:
        return super().lr_scheduler_step(scheduler, optimizer_idx, metric)

In [8]:
from ptls.frames.bert.losses.query_soft_max import QuerySoftmaxLoss

class GPTContrastiveModule(LightningModule):
    """A vanilla autoencoder, without masking, just encodes target sequence and then restores it.
    Logs train/val/test losses:
     - a CrossEntropyLoss on mcc codes
     - an MSELoss on amounts
    and train/val/test metrics:
     - a macro-averaged multiclass f1-score on mcc codes
     - a macro-averaged multiclass auroc score on mcc codes
     - an r2-score on amounts

     Attributes:
        out_amount (nn.Linear):
            A linear layer, which restores the transaction amounts.
        out_mcc (nn.Linear):
            A linear layer, which restores the transaction mcc codes.
        amount_loss_weight (float):
            Normalized loss weight for the transaction amount MSE loss.
        mcc_loss_weight (float):
            Normalized loss weight for the transaction mcc code CE loss.
        lr (float):
            The learning rate, extracted from the optimizer_config.
        ae_output_size (int):
            The output size of the decoder.

    Notes:
        amount_loss_weight, mcc_loss_weight are normalized so that amount_loss_weight + mcc_loss_weight = 1.
        This is done to remove one hyperparameter. Loss gradient size can be managed separately through lr.

    """

    def __init__(
        self,
        encoder: DictConfig,
        optimizer: DictConfig, 
        scheduler: Optional[DictConfig] = None,
        scheduler_config: Optional[dict] = None,
        neg_count: int = 5,
        temperature: float = 20,
    ) -> None:
        """Initialize VanillaAE internal state.

        Args:
            loss_weights (dict):
                A dictionary with keys "amount" and "mcc", mapping them to the corresponding loss weights
            encoder (SeqEncoderContainer):
                SeqEncoderContainer to be used as an encoder.
            mcc_head (DictConfig):
                DictConfig for mcc head, instantiated with in_channels keyword argument.
            amount_head (DictConfig):
                Partial dictconfig for amount head, instantiated with in_channels keyword argument.
            optimizer (DictConfig):
                Optimizer dictconfig, instantiated with params kwarg.
            decoder (AbsDecoder):
                AbsDecoder, to be used as the decoder.
            scheduler (Optional[DictConfig]):
                Optionally, an lr scheduler dictconfig, instantiated with optimizer kwarg
            scheduler_config (Optional[dict]):
                An lr_scheduler config for specifying scheduler-specific params, such as which metric to monitor
                See LightningModule.configure_optimizers docstring for more details.
            encoder_weights (Optional[str], optional):
                Path to encoder weights. If not specified, no weights are loaded by default.
            decoder_weights (Optional[str], optional):
                Path to decoder weights. If not specified, no weights are loaded by default.
            unfreeze_enc_after (Optional[int], optional):
                Number of epochs to wait before unfreezing encoder weights.
                The module doesn't get frozen by default.
                A negative number would freeze the weights indefinetly.
            unfreeze_dec_after (Optional[int], optional):
                Number of epochs to wait before unfreezing encoder weights.
                The module doesn't get frozen by default.
                A negative number would freeze the weights indefinetly.
            reconstruction_len (Optional[int]):
                length of reconstructed batch in predict_step, optional.
                If None, determine length from batch.seq_lens.
                If int, reconstruct that many tokens.
        """
        super().__init__()
        self.save_hyperparameters()

        self.encoder: SeqEncoderContainer = instantiate(encoder)
        self.head = instantiate({"_target_": "torchvision.ops.MLP"}, in_channels=self.encoder.embedding_size, hidden_channels=[self.encoder.trx_encoder.output_size])

        self.optimizer_dictconfig = optimizer
        self.scheduler_dictconfig = scheduler
        self.scheduler_config = scheduler_config or {}

        self.loss_fn = QuerySoftmaxLoss(temperature, reduce=True)

    def forward(self, x):
        return self.encoder(x)

    def get_neg_ix(self, mask):
        """Sample from predicts, where `mask == True`, without self element.
        sample from predicted tokens from batch
        """
        mask_num = mask.int().sum()
        mn = 1 - torch.eye(mask_num, device=mask.device)
        neg_ix = torch.multinomial(mn, self.hparams.neg_count)

        b_ix = torch.arange(mask.size(0), device=mask.device).view(-1, 1).expand_as(mask)[mask][neg_ix]
        t_ix = torch.arange(mask.size(1), device=mask.device).view(1, -1).expand_as(mask)[mask][neg_ix]
        return b_ix, t_ix

    @property
    def metric_name(self):
        return "val_loss"

    def shared_step(
        self,
        stage,
        batch: PaddedBatch,
        *args,
        **kwargs,
    ) -> STEP_OUTPUT:
        """Generalized function to do a train/val/test step.

        Args:
            stage (str): train, val, or test, depending on the stage.
            batch (PaddedBatch): Input.
            batch_idx (int): ignored

        Returns:
            STEP_OUTPUT:
                if stage == "train", returns total loss.
                else returns a dictionary of metrics.
        """
        mask = batch.seq_len_mask[:, 1:].bool()
        x_trx = self.encoder.trx_encoder(batch).payload[:, 1:]

        embeddings = self.encoder(batch).payload[:, :-1, :]
        out = self.head(embeddings)

        target = x_trx[mask].unsqueeze(1)  # N, 1, H
        predict = out[mask].unsqueeze(1)  # N, 1, H

        neg_ix = self.get_neg_ix(mask)
        negative = out[neg_ix[0], neg_ix[1]]  # N, nneg, H

        loss = self.loss_fn(target, predict, negative)

        self.log_dict(
            {f"{stage}_loss": loss.item()},
            on_step=False,
            on_epoch=True,
            batch_size=batch.seq_feature_shape[0],
        )

        return loss

    def training_step(self, *args, **kwargs) -> STEP_OUTPUT:
        return self.shared_step("train", *args, **kwargs)

    def validation_step(self, *args, **kwargs) -> Union[STEP_OUTPUT, None]:
        return self.shared_step("val", *args, **kwargs)

    def test_step(self, *args, **kwargs) -> Union[STEP_OUTPUT, None]:
        return self.shared_step("test", *args, **kwargs)

    def configure_optimizers(self):
        optimizer = instantiate(self.optimizer_dictconfig, params=self.parameters())

        if self.scheduler_dictconfig:
            scheduler = instantiate(self.scheduler_dictconfig, optimizer=optimizer)
            scheduler_config = {"scheduler": scheduler, **self.scheduler_config}

            return [optimizer], [scheduler_config]

        return optimizer

    # Overriding lr_scheduler_step to fool the exception (which doesn't appear in later versions of pytorch_lightning):
    # pytorch_lightning.utilities.exceptions.MisconfigurationException:
    #   The provided lr scheduler `...` doesn't follow PyTorch's LRScheduler API.
    #   You should override the `LightningModule.lr_scheduler_step` hook with your own logic if you are using a custom LR scheduler.
    def lr_scheduler_step(
        self, scheduler: LRSchedulerTypeUnion, optimizer_idx: int, metric
    ) -> None:
        return super().lr_scheduler_step(scheduler, optimizer_idx, metric)

In [9]:
module_args = {}
module_args["encoder"] = backbone_cfg["encoder"]
if "decoder" in backbone_cfg:
    module_args["decoder"] = backbone_cfg["decoder"]

# Instantiate the LightningModule.
# _recursive_=False to save all hyperparameters
# as DictConfigs, to enable hp loading from lightning checkpoint
module = instantiate(
    backbone_cfg["module"], **module_args, _recursive_=False
)

In [10]:
module = GPTContrastiveModule(
    encoder={
        '_target_': 'ptls.nn.RnnSeqEncoder', 
        'trx_encoder': {
            '_target_': 'ptls.nn.TrxEncoder', 
            'use_batch_norm_with_lens': True, 
            'norm_embeddings': False, 
            'embeddings_noise': 0.0003, 
            'embeddings': {
                'mcc_code': {'in': 344, 'out': 24}}, 
                'numeric_values': {'amount': 'log'}
            }, 
        'hidden_size': 1024, 
        'bidir': False, 
        'trainable_starter': 'static', 
        'type': 'lstm', 
        'is_reduce_sequence': False
    },
    # head={'_target_': 'torchvision.ops.MLP', 'hidden_channels': [25]},
    # amount_head={'_target_': 'torchvision.ops.MLP', 'hidden_channels': [1]},
    optimizer={'_target_': 'torch.optim.AdamW', 'lr': 0.0005},
    scheduler={'_target_': 'torch.optim.lr_scheduler.StepLR', 'step_size': 800, 'gamma': 0.5}
)

In [11]:
trainer = create_trainer(
    metric_name=module.metric_name,
    **backbone_cfg["trainer"],
)

# Training the model
trainer.fit(module, datamodule)

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]

  | Name    | Type             | Params
---------------------------------------------
0 | encoder | RnnSeqEncoder    | 4.3 M 
1 | head    | MLP              | 25.6 K
2 | loss_fn | QuerySoftmaxLoss | 0     
---------------------------------------------
4.3 M     Trainable params
0         Non-trainable params
4.3 M     Total params
8.680     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [16]:
torch.save(module.seq_encoder.state_dict(), "gpt.pth")

In [17]:
from src.global_validation.global_validation_pipeline import global_target_validation

{'embed_data': {'batch_size': 64, 'device': 'cuda'}, 'n_runs': '${oc.decode:${oc.env:FAST_DEV_RUN,10}}', 'model': {'_target_': 'lightgbm.LGBMClassifier', 'n_estimators': 500, 'boosting_type': 'gbdt', 'subsample': 0.5, 'subsample_freq': 1, 'learning_rate': 0.02, 'feature_fraction': 0.75, 'max_depth': 6, 'lambda_l1': 1, 'lambda_l2': 1, 'min_data_in_leaf': 50, 'random_state': 42, 'n_jobs': 8, 'verbose': -1}}

In [28]:
res = global_target_validation(
    data, 
    {
        '_target_': 'ptls.nn.RnnSeqEncoder', 
        'trx_encoder': {
            '_target_': 'ptls.nn.TrxEncoder', 
            'use_batch_norm_with_lens': True, 
            'norm_embeddings': False, 
            'embeddings_noise': 0.0003, 
            'embeddings': {
                'mcc_code': {'in': 344, 'out': 24}}, 
                'numeric_values': {'amount': 'log'}
            }, 
        'hidden_size': 1024, 
        'bidir': False, 
        'trainable_starter': 'static', 
        'type': 'lstm', 
        'is_reduce_sequence': True
    },
    cfg["validation"]["global_target"],
    "gpt"
)

In [27]:
res.agg(["mean", "std"])

Unnamed: 0,AUROC,PR-AUC,Accuracy,F1Score
mean,0.721238,0.748357,0.6672,0.721139
std,0.011553,0.010213,0.01255,0.012974


In [46]:
module.shared_step("train", batch)

  rank_zero_warn(


tensor(12.3183, grad_fn=<AddBackward0>)

In [30]:
module

VanillaAE(
  (encoder): RnnSeqEncoder(
    (trx_encoder): TrxEncoder(
      (embeddings): ModuleDict(
        (mcc_code): NoisyEmbedding(
          344, 24, padding_idx=0
          (dropout): Dropout(p=0, inplace=False)
        )
      )
      (numeric_values): ModuleDict(
        (amount): LogScaler()
      )
      (numerical_batch_norm): RBatchNormWithLens(
        (bn): BatchNorm1d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (seq_encoder): RnnEncoder(
      (rnn): LSTM(25, 1024, batch_first=True)
      (reducer): LastStepEncoder()
    )
  )
  (decoder): LSTMCellDecoder(
    (cell): LSTMCell(1024, 2048)
    (projector): Sequential(
      (0): Linear(in_features=2048, out_features=1024, bias=True)
      (1): ReLU()
    )
    (lstm): Identity()
  )
  (amount_head): MLP(
    (0): Linear(in_features=2048, out_features=1, bias=True)
    (1): Dropout(p=0.0, inplace=False)
  )
  (mcc_head): MLP(
    (0): Linear(in_features=2048, out_features=100, bia

In [4]:
df = pd.read_parquet(Path(cfg["dataset"]["dir_path"]).joinpath(cfg["dataset"]["train_file_name"]))
df.head(10)

Unnamed: 0,user_id,timestamp,mcc_code,amount,global_target
0,33172,6,4,71.463,0
1,33172,6,35,45.017,0
2,33172,8,11,13.887,0
3,33172,9,11,15.983,0
4,33172,10,11,21.341,0
5,33172,11,11,17.941,0
6,33172,12,11,17.726,0
7,33172,13,18,47.397,0
8,33172,13,1,220.009,0
9,33172,13,11,9.067,0


In [5]:
preprocessor = PandasDataPreprocessor(
    col_id="user_id",
    col_event_time="timestamp",
    event_time_transformation="none",
    cols_category=["mcc_code"],
    cols_numerical=["amount"],
    return_records=True
)

In [6]:
dataset = preprocessor.fit_transform(df)

In [7]:
train, val = train_test_split(dataset, test_size=.2)

In [8]:
train_data: CustomColesDataset = instantiate(cfg_model["dataset"], data=train)
val_data: CustomColesDataset = instantiate(cfg_model["dataset"], data=val)

In [9]:
datamodule: PtlsDataModule = instantiate(
    cfg_model["datamodule"],
    train_data=train_data,
    valid_data=val_data
)

In [10]:
model: CustomCoLES = instantiate(cfg_model["model"])

In [15]:
model_checkpoint: ModelCheckpoint = instantiate(
    cfg_model["trainer_coles"]["checkpoint_callback"],
    monitor=model.metric_name,
    mode="max"
)

In [16]:
early_stopping: EarlyStopping = instantiate(
    cfg_model["trainer_coles"]["early_stopping"],
    monitor=model.metric_name,
    mode="max"
)

In [17]:
logger: TensorBoardLogger = instantiate(cfg_model["trainer_coles"]["logger"])

In [18]:
trainer: Trainer = instantiate(
    cfg_model["trainer_coles"]["trainer"],
    callbacks=[model_checkpoint, early_stopping],
    logger=logger
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [19]:
trainer.fit(model, datamodule)

  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name               | Type            | Params
-------------------------------------------------------
0 | _loss              | ContrastiveLoss | 0     
1 | _seq_encoder       | RnnSeqEncoder   | 18.5 K
2 | _validation_metric | BatchRecallTopK | 0     
3 | _head              | Head            | 0     
-------------------------------------------------------
18.5 K    Trainable params
0         Non-trainable params
18.5 K    Total params
0.074     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
