In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import sys

dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)

os.chdir('..')

%load_ext autoreload
%autoreload

In [2]:
from pathlib import Path

import pandas as pd

import torch

from hydra import initialize, compose
from hydra.utils import instantiate, call

from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.loggers import TensorBoardLogger, CometLogger

from sklearn.model_selection import train_test_split

from src.local_validation.local_validation_model import LocalValidationModelBase
from src.preprocessing import preprocess
from src.pooling import PoolingModel

from ptls.frames import PtlsDataModule

# Example of usage with churn dataset (pooling attention model)


In [3]:
DATASET = "churn"

with initialize(config_path="../config", version_base=None):
    cfg = compose(config_name="pooling_local_validation_" + DATASET)
    
cfg_preprop = cfg["preprocessing"]
cfg_validation = cfg["validation"]["event_type_100"]
cfg_encoder = cfg["backbone"]["encoder"]

In [4]:
train, val, test = preprocess(cfg_preprop)

train_dataset = call(cfg_validation["dataset"], data=train, deterministic=False)
val_dataset = call(cfg_validation["dataset"], data=val, deterministic=True)
test_dataset = call(cfg_validation["dataset"], data=test, deterministic=True)

datamodule: PtlsDataModule = instantiate(
    cfg_validation["datamodule"],
    train_data=train_dataset,
    valid_data=val_dataset,
    test_data=test_dataset,
)

In [5]:
cfg_encoder["trx_encoder"]["embeddings"]["mcc_code"]["in"] = 345

In [6]:
encoder_name = "coles_churn"
sequence_encoder = instantiate(cfg_encoder, is_reduce_sequence=True)
sequence_encoder.load_state_dict(torch.load(f"saved_models/{encoder_name}.pth"))

<All keys matched successfully>

In [7]:
pooling_model = PoolingModel(train_data = train,
                backbone=sequence_encoder,
                backbone_embd_size=sequence_encoder.seq_encoder.hidden_size,
                max_users_in_train_dataloader=500,
                pooling_type="attention",
                min_seq_length=15,
                max_seq_length=100,
                max_embs_per_user=100)

 14%|█▍        | 551/3998 [01:09<09:04,  6.34it/s]

In [8]:
batch, target = next(iter(datamodule.train_dataloader()))

In [10]:
batch.payload["event_time"].shape

torch.Size([8, 36])

In [11]:
# validation

seed_everything(42)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=pooling_model,
    backbone_embd_size=pooling_model.get_emb_dim(),
)

Global seed set to 42


In [12]:
val_trainer = Trainer(**cfg_validation["trainer"])
    
val_trainer.fit(valid_model, datamodule)
metrics = val_trainer.test(valid_model, datamodule)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | PoolingModel     | 4.3 M 
1 | pred_head     | Sequential       | 204 K 
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | MetricCollection | 0     
4 | val_metrics   | MetricCollection | 0     
5 | test_metrics  | MetricCollection | 0     
6 | postproc      | Softmax          | 0     
---------------------------------------------------
204 K     Trainable params
4.3 M     Non-trainable params
4.5 M     Total params
18.076    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

# Compare with just seqencoder

In [18]:
seed_everything(42)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["model"],
    backbone=sequence_encoder
)

Global seed set to 42


In [19]:
val_trainer = Trainer(**cfg_validation["trainer"])
    
val_trainer.fit(valid_model, datamodule)
metrics = val_trainer.test(valid_model, datamodule)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | backbone  | RnnSeqEncoder    | 4.3 M 
1 | pred_head | Sequential       | 36.1 K
2 | loss      | CrossEntropyLoss | 0     
-----------------------------------------------
36.1 K    Trainable params
4.3 M     Non-trainable params
4.4 M     Total params
17.401    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
          AUROC             0.6694410443305969
        Accuracy            0.29433929920196533
         F1Score            0.29433929920196533
         PR-AUC            0.042737916111946106
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'AUROC': 0.6694410443305969,
  'PR-AUC': 0.042737916111946106,
  'Accuracy': 0.29433929920196533,
  'F1Score': 0.29433929920196533}]

# Compare with pooling mean

In [20]:
pooling_model.pooling_type = "mean"

In [21]:
# validation

seed_everything(42)

valid_model: LocalValidationModel = instantiate(
    cfg_validation["model"],
    backbone=pooling_model,
    backbone_embd_size = pooling_model.get_emb_dim()
)

val_trainer: Trainer = instantiate(cfg_validation["trainer"])
    
val_trainer.fit(valid_model, datamodule)
val_trainer.test(valid_model, datamodule)

Global seed set to 42
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | backbone  | PoolingModel     | 4.3 M 
1 | pred_head | Sequential       | 68.9 K
2 | loss      | CrossEntropyLoss | 0     
-----------------------------------------------
68.9 K    Trainable params
4.3 M     Non-trainable params
4.4 M     Total params
17.532    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
          AUROC             0.6771971583366394
        Accuracy            0.2960392236709595
         F1Score            0.2960392236709595
         PR-AUC             0.04380257427692413
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'AUROC': 0.6771971583366394,
  'PR-AUC': 0.04380257427692413,
  'Accuracy': 0.2960392236709595,
  'F1Score': 0.2960392236709595}]

# Compare with pooling max

In [22]:
pooling_model.pooling_type = "max"

In [23]:
# validation

seed_everything(42)

valid_model: LocalValidationModel = instantiate(
    cfg_validation["model"],
    backbone=pooling_model,
    backbone_embd_size = pooling_model.get_emb_dim()
)

val_trainer: Trainer = instantiate(cfg_validation["trainer"])
    
val_trainer.fit(valid_model, datamodule)
val_trainer.test(valid_model, datamodule)

Global seed set to 42
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | backbone  | PoolingModel     | 4.3 M 
1 | pred_head | Sequential       | 68.9 K
2 | loss      | CrossEntropyLoss | 0     
-----------------------------------------------
68.9 K    Trainable params
4.3 M     Non-trainable params
4.4 M     Total params
17.532    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
          AUROC             0.6732884645462036
        Accuracy            0.29436764121055603
         F1Score            0.29436764121055603
         PR-AUC            0.042369429022073746
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'AUROC': 0.6732884645462036,
  'PR-AUC': 0.042369429022073746,
  'Accuracy': 0.29436764121055603,
  'F1Score': 0.29436764121055603}]

# Compare with learnable attention

In [24]:
pooling_model.change_pooling_type("learnable_attention")

In [25]:
pooling_model.pooling_type

'learnable_attention'

In [26]:
from src.coles import CustomColesDataset, CustomCoLES

from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger

from ptls.frames import PtlsDataModule

In [27]:
with initialize(config_path="../config/model", version_base=None):
    cfg_model = compose(config_name="coles_" + "churn")

In [28]:
cfg_model["datamodule"]

{'_target_': 'ptls.frames.PtlsDataModule', 'train_batch_size': 128, 'valid_batch_size': 128, 'train_num_workers': 8, 'valid_num_workers': 8}

In [29]:
model: CustomCoLES = instantiate(cfg_model["model"],
                                 sequence_encoder = pooling_model)

In [30]:
# initialize original CoLES datasest - for CoLES training
train_data: CustomColesDataset = instantiate(cfg_model["dataset"], data=train)
val_data: CustomColesDataset = instantiate(cfg_model["dataset"], data=val)
    
train_datamodule: PtlsDataModule = instantiate(
    cfg_model["datamodule"],
    train_data=train_data,
    valid_data=val_data
)

In [31]:
import torch.nn as nn

# freeze backbone model
for m in model.seq_encoder.backbone.modules():
    if isinstance(m, nn.BatchNorm1d):
        m.track_running_stats = False
        m.eval()

for param in model.seq_encoder.backbone.parameters():
    param.requires_grad = False

In [32]:
for name, param in model.seq_encoder.named_parameters():
    print(name, param.requires_grad)

backbone.trx_encoder.embeddings.mcc_code.weight False
backbone.trx_encoder.numerical_batch_norm.bn.weight False
backbone.trx_encoder.numerical_batch_norm.bn.bias False
backbone.seq_encoder.starter_h False
backbone.seq_encoder.rnn.weight_ih_l0 False
backbone.seq_encoder.rnn.weight_hh_l0 False
backbone.seq_encoder.rnn.bias_ih_l0 False
backbone.seq_encoder.rnn.bias_hh_l0 False
learnable_attention_matrix.weight True
learnable_attention_matrix.bias True


In [33]:
model_checkpoint: ModelCheckpoint = instantiate(
    cfg_model["trainer_coles"]["checkpoint_callback"],
    monitor=model.metric_name,
    mode="max"
)
    
early_stopping: EarlyStopping = instantiate(
    cfg_model["trainer_coles"]["early_stopping"],
    monitor=model.metric_name,
    mode="max"
)
    
logger: TensorBoardLogger = instantiate(cfg_model["trainer_coles"]["logger"])
    
trainer: Trainer = instantiate(
    cfg_model["trainer_coles"]["trainer"],
    callbacks=[model_checkpoint, early_stopping],
    logger=logger 
)
    
trainer.fit(model, train_datamodule)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name               | Type            | Params
-------------------------------------------------------
0 | _loss              | ContrastiveLoss | 0     
1 | _seq_encoder       | PoolingModel    | 5.4 M 
2 | _validation_metric | BatchRecallTopK | 0     
3 | _head              | Head            | 0     
-------------------------------------------------------
1.0 M     Trainable params
4.3 M     Non-trainable params
5.4 M     Total params
21.455    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric recall_top_k improved. New best score: 0.249


Validation: 0it [00:00, ?it/s]

Metric recall_top_k improved by 0.035 >= min_delta = 0.01. New best score: 0.284


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric recall_top_k improved by 0.018 >= min_delta = 0.01. New best score: 0.301


Validation: 0it [00:00, ?it/s]

Metric recall_top_k improved by 0.012 >= min_delta = 0.01. New best score: 0.313


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric recall_top_k did not improve in the last 5 records. Best score: 0.313. Signaling Trainer to stop.


In [34]:
DATASET

'churn_ae'

In [35]:
torch.save(model.seq_encoder.learnable_attention_matrix.state_dict(), f"saved_models/coles_{DATASET}_learnable_attention_matrix.pth")

In [36]:
pooling_model.learnable_attention_matrix.load_state_dict(torch.load(f"saved_models/coles_{DATASET}_learnable_attention_matrix.pth"))

<All keys matched successfully>

In [37]:
# validation

seed_everything(42)

valid_model: LocalValidationModel = instantiate(
    cfg_validation["model"],
    backbone=pooling_model,
    backbone_embd_size = pooling_model.get_emb_dim()
)
 
val_trainer: Trainer = instantiate(cfg_validation["trainer"])

Global seed set to 42
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [38]:
val_trainer.max_epochs

5

In [39]:
val_trainer.fit(valid_model, datamodule)
val_trainer.test(valid_model, datamodule)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type             | Params
-----------------------------------------------
0 | backbone  | PoolingModel     | 5.4 M 
1 | pred_head | Sequential       | 68.9 K
2 | loss      | CrossEntropyLoss | 0     
-----------------------------------------------
68.9 K    Trainable params
5.4 M     Non-trainable params
5.4 M     Total params
21.731    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
          AUROC             0.6686756610870361
        Accuracy            0.29567089676856995
         F1Score            0.29567089676856995
         PR-AUC             0.04138781875371933
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'AUROC': 0.6686756610870361,
  'PR-AUC': 0.04138781875371933,
  'Accuracy': 0.29567089676856995,
  'F1Score': 0.29567089676856995}]