# <font size="6">Libraries</font>

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from torch.optim import Adam
from torch.optim.lr_scheduler import StepLR
import torch.nn.functional as F
import os, sys
from tqdm import tqdm

parent_dir = os.path.abspath('..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

print('torch version:', torch.__version__)

import ptls
from ptls.data_load import IterableChain
from ptls.data_load.iterable_processing import SeqLenFilter
from ptls.data_load.iterable_processing.to_torch_tensor import ToTorch
from ptls.data_load.iterable_processing_dataset import IterableProcessingDataset
from ptls.data_load.datasets.parquet_dataset import ParquetDataset, ParquetFiles

import pytorch_lightning as pl

from glob import glob

import sklearn
from sklearn.model_selection import train_test_split

import os, sys

from src import utils

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

parent_dir = os.path.abspath('..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

log = utils.get_logger(__name__)

%load_ext autoreload
%autoreload 2

torch version: 1.12.1+cu102
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Train model

# <font size="5">data conf</font>

In [3]:
dataset_conf = {
    # 'min_seq_len': min(dataset['trx_count'].unique()),
    'min_seq_len': 1,
    'max_seq_len': 1500,
    'event_col': 'amnt_mcc_bins',
    # 'amount_log': 'amnt',
    'event_cnt_col': 'trx_count',
    'time_col': 'time',
}

# <font size="5">dataloaders</font>

In [4]:
train_data = glob('data/train.parquet')

class TimeProc(IterableProcessingDataset):
    
    def __init__(self, time_col, tmin, tmax):
        super().__init__()
        self._time_col = time_col
        self.tmin, self.tmax = tmin, tmax
        
    def __iter__(self):
        for rec in self._src:
            features = rec[0] if type(rec) is tuple else rec
            rec[self._time_col] = np.array((features[self._time_col] - self.tmin) / (self.tmax-self.tmin))
            yield rec
            

def pp_collate_fn(time_col, event_col, event_cnt_col, return_len=False):

    def fn(batch):
        es, ts, ls = [], [], []
        
            
        for rec in batch:
            ts.append(rec[time_col])
            es.append(rec[event_col])
            ls.append(rec[event_cnt_col] - 1)
            
        ret = [torch.nn.utils.rnn.pad_sequence(ts, batch_first=True), 
               torch.nn.utils.rnn.pad_sequence(es, batch_first=True).long()]
        
        if return_len:
            return ret, torch.tensor(ls)
        else:
            return ret
            
    return fn

process = IterableChain(
            SeqLenFilter(min_seq_len=dataset_conf['min_seq_len'], max_seq_len=dataset_conf['max_seq_len']),
            TimeProc(dataset_conf['time_col'], -1, 11752),
            ToTorch()
)
   
train_ds = ParquetDataset(train_data, post_processing=process)

collate_fn = pp_collate_fn(dataset_conf['time_col'], dataset_conf['event_col'], dataset_conf['event_cnt_col'])

train_dl = torch.utils.data.DataLoader(
                        dataset=train_ds,
                        collate_fn=collate_fn,
                        num_workers=4,
                        batch_size=32)



# <font size="6">COTIC</font>

In [5]:
from src.models.components.cont_cnn import CCNN
from src.models.components.cont_cnn import Kernel
from src.models.components.cont_cnn import PredictionHead

nb_filters = 16
num_types = 432


kernel = Kernel(hidden1=8, hidden2=8, hidden3=8, in_channels=nb_filters, out_channels=nb_filters)

head = PredictionHead(in_channels=nb_filters, num_types=num_types)

net = CCNN(in_channels=32, kernel_size=5, nb_filters=nb_filters, nb_layers=2,
           num_types=num_types, kernel=kernel, head=head)

In [6]:
from src.models.base_model import BaseEventModule
from src.metrics.cont_cnn import CCNNMetrics
from src.utils.metrics import MetricsCore
from src.utils.metrics import MAE, Accuracy
import torch

from omegaconf import OmegaConf, open_dict


train_conf = OmegaConf.create({'optimizer': {"name": "adam", "params": {"lr": 0.01, "weight_decay": 1e-8}}, 
                               'scheduler': {"milestones": [40, 75], "gamma": 0.1, "step": None}})

metrics = CCNNMetrics(return_time_metric = MAE(),
                      event_type_metric = Accuracy(),
                      type_loss_coeff = 1,
                      time_loss_coeff = 10,
                      sim_size = 40,
                      reductions = {'log_likelihood': 'mean','type': 'sum', 'time': 'mean'}
                     )

model = BaseEventModule(net = net,
                        metrics = metrics,
                        optimizer = train_conf.optimizer,
                        scheduler = train_conf.scheduler,
                        head_start = 1)

  rank_zero_warn(


In [7]:
from pytorch_lightning.callbacks import RichModelSummary, RichProgressBar, EarlyStopping, ModelCheckpoint
from pytorch_lightning.loggers.tensorboard import TensorBoardLogger

model_checkpoint = ModelCheckpoint(monitor="val/log_likelihood", mode="max",  save_top_k=1,  save_last=True,
                                   verbose=False, dirpath="checkpoints/", filename="epoch_{epoch:03d}",
                                   auto_insert_metric_name=False)  

early_stopping = EarlyStopping(monitor="val/log_likelihood", mode="max", patience=100, min_delta=0)
model_summary = RichModelSummary(max_depth=-1)
rich_progress_bar = RichProgressBar()

callbacks = [model_checkpoint, early_stopping, model_summary, rich_progress_bar]


tensorboard = TensorBoardLogger(save_dir="tensorboard", prefix="", default_hp_metric=True, log_graph=False)
logger = [tensorboard]

# <font size="6">Fitting</font>

In [8]:
from pytorch_lightning import Trainer

trainer = Trainer(gpus=[0],
                  max_epochs=10,
                  limit_val_batches=100,
                  val_check_interval=1000,
                  accumulate_grad_batches=10,
                  gradient_clip_val=1,
                  callbacks=callbacks,
                  logger=logger)

# Train the model
log.info("Starting training!")
print(f'logger.version = {trainer.logger.version}')
trainer.fit(model, train_dl)

Trainer already configured with model summary callbacks: [<class 'pytorch_lightning.callbacks.rich_model_summary.RichModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


logger.version = 19


  rank_zero_warn("You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.")
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Output()

In [9]:
torch.save(model.state_dict(), "cotic-alpha-battle.pt")


KeyboardInterrupt

