In [1]:
import torch
from models.classifier import TransformerEncoder

In [2]:
hparams = {
    "hidden_size": 48,  # size of the hidden layers and embeddings
    "hidden_ff": 96,  # size of the position-wise feed-forward layer
    "n_encoders": 4,  # number of encoder blocks
    "n_heads": 2,  # number of attention heads in the multiheadattention module
    "n_local": 2,  # number of local attention heads
    "local_window_size": 4,  # size of the window for local attention
    'batch_size': 4,
    "max_length": 30,  # maximum length of the input sequence
    "vocab_size": 100,  # size of the vocabulary
    "learning_rate": 0.001,
    "num_epochs": 30,
    "attention_type": "performer",
    "norm_type": "rezero",
    "num_random_features": 32,  # number of random features for the Attention module (Performer uses this)
    "emb_dropout": 0.1,  # dropout for the embedding block
    "fw_dropout": 0.1,  # dropout for the position-wise feed-forward layer
    "att_dropout": 0.1,  # dropout for the multiheadattention module
    "dc_dropout": 0.1,  # dropout for the decoder block
    "hidden_act": "swish",  # activation function for the hidden layers (attention layers use ReLU)
    "epsilon": 1e-8,
    "weight_decay": 0.01,
    "beta1": 0.9,
    "beta2": 0.999,
}
model = TransformerEncoder(hparams)

# Sanity Check
Check if returns values and if the output looks OK.

In [3]:
batch = {
    'event': torch.randint(0, model.hparams.vocab_size, (model.hparams.batch_size, model.hparams.max_length)),
    'abspos': torch.arange(0, model.hparams.max_length).unsqueeze(0).repeat(model.hparams.batch_size, 1),
    'age': torch.randint(0, 100, (model.hparams.batch_size, 1)).repeat(1, model.hparams.max_length),
    'padding_mask': torch.zeros(model.hparams.batch_size, model.hparams.max_length),
    'targets': torch.randint(0, 2, (model.hparams.batch_size, 1)).float(),
}

In [4]:
batch

{'event': tensor([[92, 72, 88, 63, 96,  7, 21, 51, 54, 81, 41, 85, 96, 92,  6, 50, 39, 40,
          29, 12, 89, 25, 55, 29, 55, 22,  4, 73, 23, 36],
         [69, 87, 22, 14,  4, 38, 66,  9, 44, 37, 43, 60, 71, 38, 49, 30, 73, 10,
          56,  7,  5, 71, 64, 64, 96, 96, 53, 12, 49, 77],
         [77, 43, 14, 33, 47, 61, 29, 76, 96, 71, 44, 20, 13, 62, 30, 35, 92, 12,
           7, 66, 96, 77, 94, 60, 31, 88, 47,  8, 39, 36],
         [64, 57,  7,  1, 39, 45, 17, 30, 55, 68, 59,  6, 37, 83, 29, 95, 58, 17,
          46, 75, 55, 75, 40, 51, 66, 50, 75, 71, 32, 27]]),
 'abspos': tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
         [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
         [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
          18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 2

In [5]:
model(batch)

{'logits': tensor([[-0.4265],
         [-0.3558],
         [-0.2273],
         [-0.2896]], grad_fn=<AddmmBackward0>),
 'preds': tensor([[0.3950],
         [0.4120],
         [0.4434],
         [0.4281]], grad_fn=<SigmoidBackward0>)}

In [6]:
model.training_step(batch, 0)

/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pytorch_lightning/core/module.py:447: You are trying to `self.log()` but the `self.trainer` reference is not registered on the model yet. This is most likely because the model hasn't been passed to the `Trainer`


tensor(0.8685, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [7]:
model.on_train_epoch_end()

### Full pipeline would be something like

In [8]:
from dataloaders.synthetic import SyntheticDataModule

dataloader = SyntheticDataModule(num_samples=1000, max_length=hparams['max_length'],
                                  batch_size=hparams['batch_size'], vocab_size=hparams['vocab_size'])

# Import test data

In [9]:
from dataset import DataModule
sequence_path = 'fake_data/sequence_data.parquet'
targets_path = 'fake_data/targets.csv'
vocab_path = 'fake_data/vocab.json'

dataloader = DataModule(
    sequence_path=sequence_path, 
    batch_size=2,
    target_path=targets_path,
    vocab_path=vocab_path,
    subset=True
    )
dataloader.setup()

In [10]:
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping, LearningRateMonitor
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import CSVLogger

model_checkpoint = ModelCheckpoint(monitor='val/ap', save_top_k=2, save_last=True, mode='max')
early_stopping = EarlyStopping(monitor='val/ap', patience=5, mode='max')
lr_monitor = LearningRateMonitor(logging_interval='step')
logger = CSVLogger("lightning_logs", name="cls_logs")

trainer = Trainer(max_epochs=30,
                accelerator="cpu",   ### change to "cuda" or "gpu" or 'msp'
                limit_train_batches=0.5,
                logger=logger,
                accumulate_grad_batches=4,
                num_sanity_val_steps=8,
                callbacks = [model_checkpoint, early_stopping, lr_monitor],
                check_val_every_n_epoch=1)


GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pytorch_lightning/trainer/setup.py:177: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


In [11]:
trainer.fit(model, dataloader)

Steps per epoch: 100

   | Name        | Type                   | Params | Mode 
----------------------------------------------------------------
0  | transformer | Transformer            | 79.5 K | train
1  | decoder     | CLS_Decoder            | 2.4 K  | train
2  | loss        | BCEWithLogitsLoss      | 0      | train
3  | train_loss  | MeanMetric             | 0      | train
4  | val_loss    | MeanMetric             | 0      | train
5  | test_loss   | MeanMetric             | 0      | train
6  | train_acc   | BinaryAccuracy         | 0      | train
7  | val_acc     | BinaryAccuracy         | 0      | train
8  | test_acc    | BinaryAccuracy         | 0      | train
9  | train_mcc   | BinaryMatthewsCorrCoef | 0      | train
10 | val_mcc     | BinaryMatthewsCorrCoef | 0      | train
11 | test_mcc    | BinaryMatthewsCorrCoef | 0      | train
12 | train_ap    | BinaryAveragePrecision | 0      | train
13 | val_ap      | BinaryAveragePrecision | 0      | train
14 | test_ap     | BinaryAve

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


{'abspos': [100.5, 101.5, 102.5, 103.5, 104.5], 'age': [10, 11, 12, 13, 14], 'event': [[6], [9, 5, 6, 8, 5, 7, 4, 7, 2], [1, 1, 9, 1], [10, 4, 10, 4, 7, 2, 9, 4], [10]], 'target': 1}
{'abspos': [100.5, 101.5, 102.5], 'age': [10, 11, 12], 'event': [[7, 9, 10], [9, 6, 7, 2, 7, 2, 8, 2], [2, 6]], 'target': 1}
{'abspos': [100.5, 101.5, 102.5, 103.5, 104.5, 105.5, 106.5], 'age': [10, 11, 12, 13, 14, 15, 16], 'event': [[8, 7], [2, 3, 3, 3, 4, 8, 9, 4], [5, 3, 7, 8, 2, 2, 1], [5], [10, 10, 4, 10, 6, 5, 4, 7], [3, 4, 3, 3, 9, 5, 6, 7, 8], [5, 2, 5, 7, 5, 3]], 'target': 1}
{'abspos': [100.5, 101.5, 102.5, 103.5, 104.5, 105.5], 'age': [10, 11, 12, 13, 14, 15], 'event': [[9, 3, 1, 8, 6, 7], [4, 1, 10, 4, 5, 4, 3, 5, 1], [1, 7, 8, 8, 5], [3, 5, 6], [4, 1, 6, 4], [10, 9, 9, 2, 3, 8, 7]], 'target': 1}
{'abspos': [100.5, 101.5, 102.5, 103.5, 104.5, 105.5, 106.5], 'age': [10, 11, 12, 13, 14, 15, 16], 'event': [[4, 10, 5, 10, 6, 1, 5, 7], [8, 5, 8, 5], [8, 7, 4, 4], [3, 4, 8, 5, 9], [3, 4, 8, 9, 3, 6],

/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=9` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

{'abspos': [100.5, 101.5, 102.5, 103.5, 104.5, 105.5, 106.5, 107.5], 'age': [10, 11, 12, 13, 14, 15, 16, 17], 'event': [[2], [3, 4, 4, 8, 9, 1], [1, 5, 3, 3, 2], [10, 4, 1, 7, 6], [2], [10, 9], [10, 5], [7, 10, 10, 2, 7, 3, 1, 4]], 'target': 1}
{'abspos': [100.5, 101.5, 102.5, 103.5, 104.5, 105.5, 106.5, 107.5], 'age': [10, 11, 12, 13, 14, 15, 16, 17], 'event': [[5, 5, 1, 2, 4], [7, 10, 7, 9, 10], [3, 2, 3, 1, 1], [2, 5, 8, 8, 6], [3, 7, 1, 10, 7], [7, 4, 4, 10, 8], [7, 9, 7], [10, 4, 8, 8]], 'target': 0}
{'abspos': [100.5, 101.5], 'age': [10, 11], 'event': [[3, 6, 1], [9, 1, 6, 3, 7, 2]], 'target': 0}
{'abspos': [100.5, 101.5], 'age': [10, 11], 'event': [[10, 9, 2, 1, 8], [6, 1]], 'target': 0}
{'abspos': [100.5, 101.5, 102.5, 103.5, 104.5], 'age': [10, 11, 12, 13, 14], 'event': [[3, 1], [5], [9], [9, 10], [9, 1, 2, 7, 2]], 'target': 0}
{'abspos': [100.5], 'age': [10], 'event': [[7, 4, 3, 8]], 'target': 0}
{'abspos': [100.5], 'age': [10], 'event': [[10]], 'target': 1}
{'abspos': [100.5

Validation: |          | 0/? [00:00<?, ?it/s]

{'abspos': [100.5, 101.5, 102.5, 103.5, 104.5], 'age': [10, 11, 12, 13, 14], 'event': [[6], [9, 5, 6, 8, 5, 7, 4, 7, 2], [1, 1, 9, 1], [10, 4, 10, 4, 7, 2, 9, 4], [10]]}
{'abspos': [100.5, 101.5, 102.5], 'age': [10, 11, 12], 'event': [[7, 9, 10], [9, 6, 7, 2, 7, 2, 8, 2], [2, 6]]}


KeyError: 'target'