In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import sys

dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)

os.chdir('..')

%load_ext autoreload
%autoreload

In [2]:
from pathlib import Path

import pandas as pd

import torch

from hydra import initialize, compose
from hydra.utils import instantiate, call

from pytorch_lightning import Trainer, seed_everything

from src.local_validation.local_validation_model import LocalValidationModelBase
from src.preprocessing import preprocess

from ptls.frames import PtlsDataModule

from src.datasets.coles import CustomColesDataset
from src.modules.coles import CustomCoLES

from src.pooling import PoolingModel

# Example of usage with churn dataset (pooling attention model)


In [3]:
config_name = "pooling_local_validation_default"
attention_learn_backbone_config = "coles_default"

backbone_path = "saved_models/default/coles_default/coles_default_4.pth"
path_for_attention = "saved_models/coles_default_learnable_attention_matrix_4.pth"
SEED = 46

In [4]:
with initialize(config_path="../config", version_base=None):
    cfg = compose(config_name=config_name)
    
cfg_preprop = cfg["preprocessing"]
cfg_validation = cfg["validation"]["local_target"] 
cfg_encoder = cfg["backbone"]["encoder"]

In [5]:
train, val, test = preprocess(cfg_preprop)

train_dataset = call(cfg_validation["dataset"], data=train, deterministic=False)
val_dataset = call(cfg_validation["dataset"], data=val, deterministic=True)
test_dataset = call(cfg_validation["dataset"], data=test, deterministic=True)

datamodule: PtlsDataModule = instantiate(
    cfg_validation["datamodule"],
    train_data=train_dataset,
    valid_data=val_dataset,
    test_data=test_dataset,
)

In [6]:
sequence_encoder = instantiate(cfg_encoder, is_reduce_sequence=True)
sequence_encoder.load_state_dict(torch.load(backbone_path, map_location='cuda:0'))

<All keys matched successfully>

In [7]:
pooling_model = PoolingModel(train_data=train,
                backbone=sequence_encoder,
                backbone_embd_size=sequence_encoder.seq_encoder.hidden_size,
                max_users_in_train_dataloader=150,
                pooling_type="attention",
                min_seq_length=15,
                max_seq_length=100,
                max_embs_per_user=100)

  3%|▎         | 149/5664 [00:01<01:12, 75.78it/s] 
100%|██████████| 14988/14988 [00:16<00:00, 931.51it/s]


In [8]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=pooling_model,
    backbone_embd_size=pooling_model.embedding_size,
)

Global seed set to 46


In [9]:
val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")
    
val_trainer.fit(valid_model, datamodule)
metrics = val_trainer.test(valid_model, datamodule)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | PoolingModel     | 2.0 M 
1 | pred_head     | Sequential       | 1.6 K 
2 | train_metrics | MetricCollection | 0     
3 | val_metrics   | MetricCollection | 0     
4 | test_metrics  | MetricCollection | 0     
5 | postproc      | Sigmoid          | 0     
---------------------------------------------------
1.6 K     Trainable params
2.0 M     Non-trainable params
2.0 M     Total params
7.894     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.6319074630737305
      TestAccuracy          0.9942672252655029
       TestF1Score                  0.0
       TestPR-AUC          0.011764803901314735
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


# Compare with just seqencoder

In [10]:
sequence_encoder.seq_encoder.rnn.hidden_size

800

In [11]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=sequence_encoder,
    backbone_embd_size=sequence_encoder.seq_encoder.rnn.hidden_size
)

Global seed set to 46


In [12]:
val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")
    
val_trainer.fit(valid_model, datamodule)
metrics = val_trainer.test(valid_model, datamodule)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | RnnSeqEncoder    | 2.0 M 
1 | pred_head     | Sequential       | 801   
2 | train_metrics | MetricCollection | 0     
3 | val_metrics   | MetricCollection | 0     
4 | test_metrics  | MetricCollection | 0     
5 | postproc      | Sigmoid          | 0     
---------------------------------------------------
801       Trainable params
2.0 M     Non-trainable params
2.0 M     Total params
7.891     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.6141005754470825
      TestAccuracy          0.9942672252655029
       TestF1Score                  0.0
       TestPR-AUC           0.01044189091771841
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


# Compare with pooling mean

In [13]:
pooling_model.pooling_type = "mean"

In [14]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=pooling_model,
    backbone_embd_size=pooling_model.embedding_size,
)

val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")
    
val_trainer.fit(valid_model, datamodule)
val_trainer.test(valid_model, datamodule)

Global seed set to 46
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | PoolingModel     | 2.0 M 
1 | pred_head     | Sequential       | 1.6 K 
2 | train_metrics | MetricCollection | 0     
3 | val_metrics   | MetricCollection | 0     
4 | test_metrics  | MetricCollection | 0     
5 | postproc      | Sigmoid          | 0     
---------------------------------------------------
1.6 K     Trainable params
2.0 M     Non-trainable params
2.0 M     Total params
7.894     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.6605398654937744
      TestAccuracy          0.9942672252655029
       TestF1Score                  0.0
       TestPR-AUC          0.012475768104195595
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'TestAUROC': 0.6605398654937744,
  'TestAccuracy': 0.9942672252655029,
  'TestF1Score': 0.0,
  'TestPR-AUC': 0.012475768104195595}]

# Compare with pooling max

In [15]:
pooling_model.pooling_type = "max"

In [16]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=pooling_model,
    backbone_embd_size=pooling_model.embedding_size,
)

val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")
    
val_trainer.fit(valid_model, datamodule)
val_trainer.test(valid_model, datamodule)

Global seed set to 46
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | PoolingModel     | 2.0 M 
1 | pred_head     | Sequential       | 1.6 K 
2 | train_metrics | MetricCollection | 0     
3 | val_metrics   | MetricCollection | 0     
4 | test_metrics  | MetricCollection | 0     
5 | postproc      | Sigmoid          | 0     
---------------------------------------------------
1.6 K     Trainable params
2.0 M     Non-trainable params
2.0 M     Total params
7.894     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.4665347933769226
      TestAccuracy          0.9942672252655029
       TestF1Score                  0.0
       TestPR-AUC          0.005134256090968847
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'TestAUROC': 0.4665347933769226,
  'TestAccuracy': 0.9942672252655029,
  'TestF1Score': 0.0,
  'TestPR-AUC': 0.005134256090968847}]

# Compare with learnable attention

## Learning of attention matrix in CoLES way (do not run this if you already have checkpoints for attention matrix)

In [None]:
pooling_model.change_pooling_type("learnable_attention")

In [None]:
with initialize(config_path="../config/backbone", version_base=None):
    cfg_model = compose(config_name=attention_learn_backbone_config)

In [None]:
model = CustomCoLES(cfg_model["module"]["optimizer_partial"], 
                    lr_scheduler_partial=cfg_model["module"]["lr_scheduler_partial"],
                    encoder=cfg_model["encoder"])


In [None]:
model._seq_encoder = pooling_model

In [None]:
# initialize original CoLES datasest - for CoLES training
train_data: CustomColesDataset = instantiate(cfg_model["dataset"], data=train, deterministic=False)
val_data: CustomColesDataset = instantiate(cfg_model["dataset"], data=val, deterministic=False)
    
train_datamodule: PtlsDataModule = instantiate(
    cfg_model["datamodule"],
    train_data=train_data,
    valid_data=val_data
)

In [None]:
import torch.nn as nn

# freeze backbone model
for m in model._seq_encoder.backbone.modules():
    if isinstance(m, nn.BatchNorm1d):
        m.track_running_stats = False
        m.eval()

for param in model._seq_encoder.backbone.parameters():
    param.requires_grad = False


In [None]:
for name, param in model._seq_encoder.named_parameters():
    print(name, param.requires_grad)

backbone.trx_encoder.embeddings.mcc_code.weight False
backbone.trx_encoder.numerical_batch_norm.bn.weight False
backbone.trx_encoder.numerical_batch_norm.bn.bias False
backbone.seq_encoder.starter_h False
backbone.seq_encoder.rnn.weight_ih_l0 False
backbone.seq_encoder.rnn.weight_hh_l0 False
backbone.seq_encoder.rnn.bias_ih_l0 False
backbone.seq_encoder.rnn.bias_hh_l0 False
learnable_attention_matrix.weight True
learnable_attention_matrix.bias True


In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
model_checkpoint = ModelCheckpoint(dirpath= "logs/checkpoints/coles", 
                                      filename="coles_model",
                                      monitor=model.metric_name,
                                      mode="max")
early_stopping = EarlyStopping(min_delta=.01, 
                               patience=5,
                               verbose=True,
                               monitor=model.metric_name,
                               mode="max")

In [None]:
trainer = Trainer(
    **cfg_model["trainer"],
    callbacks=[model_checkpoint, early_stopping]
)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [None]:
trainer.fit(model, train_datamodule)


  | Name               | Type            | Params
-------------------------------------------------------
0 | _loss              | ContrastiveLoss | 0     
1 | _seq_encoder       | PoolingModel    | 2.6 M 
2 | _validation_metric | BatchRecallTopK | 0     
3 | _head              | Head            | 0     
4 | encoder            | RnnSeqEncoder   | 2.0 M 
-------------------------------------------------------
2.6 M     Trainable params
2.0 M     Non-trainable params
4.6 M     Total params
18.339    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric recall_top_k improved. New best score: 0.831


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Metric recall_top_k improved by 0.014 >= min_delta = 0.01. New best score: 0.845


Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Monitored metric recall_top_k did not improve in the last 5 records. Best score: 0.845. Signaling Trainer to stop.


In [None]:
torch.save(model._seq_encoder.learnable_attention_matrix.state_dict(), path_for_attention)

## Pooling with learnable attention

In [9]:
pooling_model.change_pooling_type("learnable_attention")

In [10]:
pooling_model.learnable_attention_matrix.load_state_dict(torch.load(path_for_attention))

<All keys matched successfully>

In [11]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=pooling_model,
    backbone_embd_size=pooling_model.embedding_size
)
 
val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")

val_trainer.fit(valid_model, datamodule)
val_trainer.test(valid_model, datamodule)

Global seed set to 46
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | PoolingModel     | 2.6 M 
1 | pred_head     | Sequential       | 1.6 K 
2 | train_metrics | MetricCollection | 0     
3 | val_metrics   | MetricCollection | 0     
4 | test_metrics  | MetricCollection | 0     
5 | postproc      | Sigmoid          | 0     
---------------------------------------------------
1.6 K     Trainable params
2.6 M     Non-trainable params
2.6 M     Total params
10.457    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.5934466123580933
      TestAccuracy          0.9942672252655029
       TestF1Score                  0.0
       TestPR-AUC          0.008661866188049316
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'TestAUROC': 0.5934466123580933,
  'TestAccuracy': 0.9942672252655029,
  'TestF1Score': 0.0,
  'TestPR-AUC': 0.008661866188049316}]