In [1]:
import sys
sys.path.append("src")

In [2]:
import hydra
import torch

from torch.optim import Adam
from omegaconf import DictConfig, OmegaConf
from hydra.utils import instantiate
from hydra import compose, initialize

In [3]:
with initialize(version_base="1.3", config_path="configs"):
    cfg = compose(config_name="train")

In [4]:
print(cfg)

{'task_name': 'train', 'callbacks': {'checkpoint': {'_target_': 'lightning.pytorch.callbacks.ModelCheckpoint', 'monitor': 'val_loss', 'dirpath': 'logs/${task_name}/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}/checkpoint/', 'filename': 'ner-{epoch:02d}-{val_loss:.2f}', 'save_top_k': 1, 'mode': 'min'}, 'early_stop': {'_target_': 'lightning.pytorch.callbacks.early_stopping.EarlyStopping', 'monitor': 'val_acc', 'mode': 'max', 'verbose': True, 'patience': 10}}, 'data': {'_target_': 'ner.data.ConllDataModule', 'root_dir': 'data/conll', 'max_len': 65, 'batch_size': 32}, 'logger': {'mlflow_logger': {'_target_': 'lightning.pytorch.loggers.mlflow.MLFlowLogger', 'tracking_uri': 'logs/mlflow/', 'experiment_name': 'ner', 'log_model': True}, 'tensorboard_logger': {'_target_': 'lightning.pytorch.loggers.tensorboard.TensorBoardLogger', 'save_dir': 'logs/tensorboard/', 'name': 'ner'}}, 'loss': {'_target_': 'ner.loss.SparseCategoricalCrossentropy', 'reduction': 'mean'}, 'metrics': {'_target_': 'torchmetrics.Acc

In [5]:
from ner.model import BiLSTMModule

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
module = BiLSTMModule(
    model = cfg.model,
    loss_fn = cfg.loss,
    metrics = cfg.metrics,
    optim = cfg.optimizer
)



In [7]:
module.model

BiLSTMModel(
  (word_embedding): Embedding(23626, 20, padding_idx=0)
  (pos_embedding): Embedding(47, 20, padding_idx=0)
  (chunk_embedding): Embedding(22, 20, padding_idx=0)
  (spatial_dropout): Dropout(p=0.3, inplace=False)
  (lstm): LSTM(60, 50, batch_first=True, dropout=0.6, bidirectional=True)
  (output_layer): Linear(in_features=100, out_features=10, bias=True)
  (sigmoid): Sigmoid()
)

In [8]:
instantiate(cfg.module, _recursive_=False)

BiLSTMModule(
  (model): BiLSTMModel(
    (word_embedding): Embedding(23626, 20, padding_idx=0)
    (pos_embedding): Embedding(47, 20, padding_idx=0)
    (chunk_embedding): Embedding(22, 20, padding_idx=0)
    (spatial_dropout): Dropout(p=0.3, inplace=False)
    (lstm): LSTM(60, 50, batch_first=True, dropout=0.6, bidirectional=True)
    (output_layer): Linear(in_features=100, out_features=10, bias=True)
    (sigmoid): Sigmoid()
  )
  (loss_fn): SparseCategoricalCrossentropy()
  (train_acc): MulticlassAccuracy()
  (val_acc): MulticlassAccuracy()
  (test_acc): MulticlassAccuracy()
)

In [9]:
data = instantiate(cfg.data)
data.setup()

In [10]:
train = data.train

train.token_vocab

{'.': 2,
 ',': 3,
 'the': 4,
 'of': 5,
 'in': 6,
 'to': 7,
 'a': 8,
 '(': 9,
 ')': 10,
 'and': 11,
 '"': 12,
 'on': 13,
 'said': 14,
 "'s": 15,
 '1': 16,
 'for': 17,
 '-': 18,
 'The': 19,
 'was': 20,
 '2': 21,
 '0': 22,
 '3': 23,
 'at': 24,
 'with': 25,
 'that': 26,
 'from': 27,
 'by': 28,
 ':': 29,
 'is': 30,
 'as': 31,
 'he': 32,
 '4': 33,
 'had': 34,
 'has': 35,
 'it': 36,
 'his': 37,
 'not': 38,
 'were': 39,
 'be': 40,
 'an': 41,
 'have': 42,
 'after': 43,
 'who': 44,
 'will': 45,
 '5': 46,
 'but': 47,
 'first': 48,
 'U.S.': 49,
 'been': 50,
 '$': 51,
 '--': 52,
 'two': 53,
 'are': 54,
 'their': 55,
 '6': 56,
 'beat': 57,
 'which': 58,
 'would': 59,
 'up': 60,
 'I': 61,
 'its': 62,
 'they': 63,
 'percent': 64,
 'year': 65,
 'out': 66,
 'Thursday': 67,
 'this': 68,
 'last': 69,
 'million': 70,
 'over': 71,
 'Wednesday': 72,
 'one': 73,
 '7': 74,
 'government': 75,
 'against': 76,
 '/': 77,
 'police': 78,
 'when': 79,
 'second': 80,
 'also': 81,
 'Tuesday': 82,
 'He': 83,
 'It': 84,


In [11]:
ccccccccc

NameError: name 'ccccccccc' is not defined

In [13]:
a, b, c, d = next(iter(data.train_dataloader()))

In [14]:
model = instantiate(cfg.model)

In [15]:
logits = model(a, b, c)

logits.shape

torch.Size([32, 65, 10])

In [16]:
d.shape

torch.Size([32, 65])

In [17]:
loss = instantiate(cfg.loss)
loss

SparseCategoricalCrossentropy()

In [18]:
loss(logits.view(-1, logits.shape[-1]), d.view(-1))

tensor(2.3130, grad_fn=<MeanBackward0>)

In [19]:
logits.view(-1, logits.shape[-1]).argmax(dim=-1).shape

torch.Size([2080])

In [20]:
d.view(-1).shape

torch.Size([2080])

In [21]:
import torch
import torch.nn as nn

class SparseCategoricalCrossentropy(nn.Module):
    def __init__(self, reduction='mean'):
        """
        Custom Sparse Categorical Crossentropy implementation.
        
        Args:
            reduction (str): Specifies the reduction to apply to the output. Options: 'mean', 'sum', or 'none'.
        """
        super(SparseCategoricalCrossentropy, self).__init__()
        assert reduction in ['mean', 'sum', 'none'], "Invalid reduction type"
        self.reduction = reduction

    def forward(self, logits, targets):
        """
        Forward pass for sparse categorical crossentropy.
        
        Args:
            logits (torch.Tensor): Predicted logits of shape (batch_size, num_classes).
            targets (torch.Tensor): Ground truth labels of shape (batch_size,) with integer class indices.
        
        Returns:
            torch.Tensor: Loss value.
        """
        # Apply LogSoftmax to logits
        log_probs = torch.log_softmax(logits, dim=-1)
        
        # Gather log probabilities corresponding to target labels
        selected_log_probs = log_probs[range(len(targets)), targets]
        
        # Compute loss (negative log probability of the correct class)
        loss = -selected_log_probs
        
        # Apply reduction
        if self.reduction == 'mean':
            return loss.mean()
        elif self.reduction == 'sum':
            return loss.sum()
        else:
            return loss


In [22]:
loss_ = SparseCategoricalCrossentropy()

loss_(logits.view(-1, logits.shape[-1]), d.view(-1))

tensor(2.3130, grad_fn=<MeanBackward0>)