In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import sys

dir2 = os.path.abspath('')
dir1 = os.path.dirname(dir2)
if not dir1 in sys.path:
    sys.path.append(dir1)

os.chdir('..')

%load_ext autoreload
%autoreload

In [2]:
from pathlib import Path

import pandas as pd

import torch

from hydra import initialize, compose
from hydra.utils import instantiate, call

from pytorch_lightning import Trainer, seed_everything

from src.local_validation.local_validation_model import LocalValidationModelBase
from src.preprocessing import preprocess

from ptls.frames import PtlsDataModule

from src.datasets.coles import CustomColesDataset
from src.modules.coles import CustomCoLES

from src.pooling import PoolingModel

# Example of usage with churn dataset (pooling attention model)


In [3]:
config_name = "pooling_local_validation_churn"
attention_learn_backbone_config = "coles_churn"

backbone_path = "saved_models/churn/vanilla_coles/coles_churn/coles_churn_0.pth"
path_for_attention = "saved_models/coles_churn_learnable_attention_matrix_0.pth"
SEED = 42

In [4]:
DATASET = "churn"

with initialize(config_path="../config", version_base=None):
    cfg = compose(config_name=config_name)
    
cfg_preprop = cfg["preprocessing"]
cfg_validation = cfg["validation"]["event_type_100"]
cfg_encoder = cfg["backbone"]["encoder"]

In [5]:
train, val, test = preprocess(cfg_preprop)

train_dataset = call(cfg_validation["dataset"], data=train, deterministic=True)
val_dataset = call(cfg_validation["dataset"], data=val, deterministic=True)
test_dataset = call(cfg_validation["dataset"], data=test, deterministic=True)

datamodule: PtlsDataModule = instantiate(
    cfg_validation["datamodule"],
    train_data=train_dataset,
    valid_data=val_dataset,
    test_data=test_dataset,
)

In [6]:
cfg_encoder["trx_encoder"]["embeddings"]["mcc_code"]["in"] = 345

In [7]:
sequence_encoder = instantiate(cfg_encoder, is_reduce_sequence=True)
sequence_encoder.load_state_dict(torch.load(backbone_path))

<All keys matched successfully>

In [8]:
pooling_model = PoolingModel(train_data = train,
                backbone=sequence_encoder,
                backbone_embd_size=sequence_encoder.seq_encoder.hidden_size,
                max_users_in_train_dataloader=500,
                pooling_type="attention",
                min_seq_length=15,
                max_seq_length=100,
                max_embs_per_user=100)

 14%|█▍        | 561/3998 [00:05<00:32, 107.33it/s]
100%|██████████| 4287/4287 [00:10<00:00, 424.36it/s]


In [8]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=pooling_model,
    backbone_embd_size=pooling_model.embedding_size,
)

Global seed set to 42


In [9]:
val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")
    
val_trainer.fit(valid_model, datamodule)
metrics = val_trainer.test(valid_model, datamodule)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | PoolingModel     | 4.3 M 
1 | pred_head     | Sequential       | 204 K 
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | MetricCollection | 0     
4 | val_metrics   | MetricCollection | 0     
5 | test_metrics  | MetricCollection | 0     
6 | postproc      | Softmax          | 0     
---------------------------------------------------
204 K     Trainable params
4.3 M     Non-trainable params
4.5 M     Total params
18.076    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.7118597030639648
      TestAccuracy          0.25777778029441833
       TestF1Score          0.25777778029441833
       TestPR-AUC           0.2022126466035843
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


# Compare with just seqencoder

In [11]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=sequence_encoder,
)

Global seed set to 42


In [12]:
val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")
    
val_trainer.fit(valid_model, datamodule)
metrics = val_trainer.test(valid_model, datamodule)

GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | RnnSeqEncoder    | 4.3 M 
1 | pred_head     | Sequential       | 102 K 
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | MetricCollection | 0     
4 | val_metrics   | MetricCollection | 0     
5 | test_metrics  | MetricCollection | 0     
6 | postproc      | Softmax          | 0     
---------------------------------------------------
102 K     Trainable params
4.3 M     Non-trainable params
4.4 M     Total params
17.667    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.7138686180114746
      TestAccuracy          0.2711111009120941
       TestF1Score          0.2711111009120941
       TestPR-AUC           0.20457403361797333
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


# Compare with pooling mean

In [13]:
pooling_model.pooling_type = "mean"

In [14]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=pooling_model,
    backbone_embd_size=pooling_model.embedding_size,
)

val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")
    
val_trainer.fit(valid_model, datamodule)
val_trainer.test(valid_model, datamodule)

Global seed set to 42
GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | PoolingModel     | 4.3 M 
1 | pred_head     | Sequential       | 204 K 
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | MetricCollection | 0     
4 | val_metrics   | MetricCollection | 0     
5 | test_metrics  | MetricCollection | 0     
6 | postproc      | Softmax          | 0     
---------------------------------------------------
204 K     Trainable params
4.3 M     Non-trainable params
4.5 M     Total params
18.076    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.7126914858818054
      TestAccuracy          0.2613333463668823
       TestF1Score          0.2613333463668823
       TestPR-AUC           0.2045554220676422
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'TestAUROC': 0.7126914858818054,
  'TestAccuracy': 0.2613333463668823,
  'TestF1Score': 0.2613333463668823,
  'TestPR-AUC': 0.2045554220676422}]

# Compare with pooling max

In [15]:
pooling_model.pooling_type = "max"

In [16]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=pooling_model,
    backbone_embd_size=pooling_model.embedding_size,
)

val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")
    
val_trainer.fit(valid_model, datamodule)
val_trainer.test(valid_model, datamodule)

Global seed set to 42
GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | PoolingModel     | 4.3 M 
1 | pred_head     | Sequential       | 204 K 
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | MetricCollection | 0     
4 | val_metrics   | MetricCollection | 0     
5 | test_metrics  | MetricCollection | 0     
6 | postproc      | Softmax          | 0     
---------------------------------------------------
204 K     Trainable params
4.3 M     Non-trainable params
4.5 M     Total params
18.076    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.7132975459098816
      TestAccuracy          0.25688889622688293
       TestF1Score          0.25688889622688293
       TestPR-AUC           0.20453201234340668
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'TestAUROC': 0.7132975459098816,
  'TestAccuracy': 0.25688889622688293,
  'TestF1Score': 0.25688889622688293,
  'TestPR-AUC': 0.20453201234340668}]

# Compare with learnable attention

In [9]:
pooling_model.change_pooling_type("learnable_attention")

In [15]:
with initialize(config_path="../config/backbone", version_base=None):
    cfg_model = compose(config_name=attention_learn_backbone_config)

In [16]:
model = CustomCoLES(cfg_model["module"]["optimizer_partial"], 
                    lr_scheduler_partial=cfg_model["module"]["lr_scheduler_partial"],
                    encoder=cfg_model["encoder"])


In [17]:
model._seq_encoder = pooling_model

In [18]:
cfg_model["datamodule"]

{'_target_': 'ptls.frames.PtlsDataModule', 'train_batch_size': 32, 'valid_batch_size': 32, 'train_num_workers': 8, 'valid_num_workers': 8}

In [19]:
# initialize original CoLES datasest - for CoLES training
train_data: CustomColesDataset = instantiate(cfg_model["dataset"], data=train, deterministic=False)
val_data: CustomColesDataset = instantiate(cfg_model["dataset"], data=val, deterministic=False)
    
train_datamodule: PtlsDataModule = instantiate(
    cfg_model["datamodule"],
    train_data=train_data,
    valid_data=val_data
)

In [20]:
import torch.nn as nn

# freeze backbone model
for m in model._seq_encoder.backbone.modules():
    if isinstance(m, nn.BatchNorm1d):
        m.track_running_stats = False
        m.eval()

for param in model._seq_encoder.backbone.parameters():
    param.requires_grad = False


In [21]:
for name, param in model._seq_encoder.named_parameters():
    print(name, param.requires_grad)

backbone.trx_encoder.embeddings.mcc_code.weight False
backbone.trx_encoder.numerical_batch_norm.bn.weight False
backbone.trx_encoder.numerical_batch_norm.bn.bias False
backbone.seq_encoder.starter_h False
backbone.seq_encoder.rnn.weight_ih_l0 False
backbone.seq_encoder.rnn.weight_hh_l0 False
backbone.seq_encoder.rnn.bias_ih_l0 False
backbone.seq_encoder.rnn.bias_hh_l0 False
learnable_attention_matrix.weight True
learnable_attention_matrix.bias True


In [22]:
cfg_model["trainer"].keys()

dict_keys(['max_epochs', 'log_every_n_steps'])

In [23]:
trainer = Trainer(**cfg_model["trainer"],
                  accelerator="gpu")
    
trainer.fit(model, train_datamodule)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name               | Type            | Params
-------------------------------------------------------
0 | _loss              | ContrastiveLoss | 0     
1 | _seq_encoder       | PoolingModel    | 5.4 M 
2 | _validation_metric | BatchRecallTopK | 0     
3 | _head              | Head            | 0     
4 | encoder            | RnnSeqEncoder   | 4.3 M 
-------------------------------------------------------
5.4 M     Trainable params
4.3 M     Non-trainable params
9.7 M     Total params
38.712    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x000001EDC01A5750>
Traceback (most recent call last):
  File "c:\Users\makov\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "c:\Users\makov\anaconda3\lib\site-packages\torch\utils\data\dataloader.py", line 1436, in _shutdown_workers
    if self._persistent_workers or self._workers_status[worker_id]:
AttributeError: '_MultiProcessingDataLoaderIter' object has no attribute '_workers_status'


In [24]:
torch.save(model._seq_encoder.learnable_attention_matrix.state_dict(), path_for_attention)

In [10]:
pooling_model.learnable_attention_matrix.load_state_dict(torch.load(path_for_attention))

<All keys matched successfully>

In [11]:
# validation

seed_everything(SEED)

valid_model: LocalValidationModelBase = instantiate(
    cfg_validation["module"],
    backbone=pooling_model,
    backbone_embd_size=pooling_model.embedding_size
)
 
val_trainer = Trainer(**cfg_validation["trainer"],
                      accelerator="gpu")

val_trainer.fit(valid_model, datamodule)
val_trainer.test(valid_model, datamodule)

Global seed set to 42
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type             | Params
---------------------------------------------------
0 | backbone      | PoolingModel     | 5.4 M 
1 | pred_head     | Sequential       | 204 K 
2 | loss          | CrossEntropyLoss | 0     
3 | train_metrics | MetricCollection | 0     
4 | val_metrics   | MetricCollection | 0     
5 | test_metrics  | MetricCollection | 0     
6 | postproc      | Softmax          | 0     
---------------------------------------------------
204 K     Trainable params
5.4 M     Non-trainable params
5.6 M     Total params
22.275    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        TestAUROC           0.7046064734458923
      TestAccuracy          0.25733333826065063
       TestF1Score          0.25733333826065063
       TestPR-AUC           0.19813217222690582
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'TestAUROC': 0.7046064734458923,
  'TestAccuracy': 0.25733333826065063,
  'TestF1Score': 0.25733333826065063,
  'TestPR-AUC': 0.19813217222690582}]