# Data

In [1]:
import numpy as np
training_data = [
  ("The dog ate the apple".split(), ["determiner", "noun", "verb", "determiner", "noun"]),
  ("Everybody read that book".split(), ["noun", "verb", "determiner", "noun"]),
  ("Everybody does machine learning nowadays".split(), ["noun", "verb", "noun", "noun", "adverb"]),
]

tag_to_ind = {
  "determiner": 0,
  "noun": 1,
  "verb": 2,
  "adverb": 3,
}

ind_to_tag = {v: k for k, v in tag_to_ind.items()}
vocab = {a: b for b, a in enumerate(set(word for sentence, _ in training_data for word in sentence))}
vocab

{'read': 0,
 'apple': 1,
 'the': 2,
 'that': 3,
 'ate': 4,
 'does': 5,
 'machine': 6,
 'Everybody': 7,
 'learning': 8,
 'dog': 9,
 'book': 10,
 'The': 11,
 'nowadays': 12}

# Model

In [2]:
import lightning as L
import torch
import torchmetrics
import torchmetrics.text

class GRUTagger(L.LightningModule):
  def __init__(self, vocab, tag_to_ind):
    super().__init__()
    self.embedding = torch.nn.Embedding(len(vocab), embedding_dim=6) # word index -> embedding vector (6x1)
    self.gru = torch.nn.GRU(input_size=6, hidden_size=12, batch_first=True)
    self.classifier = torch.nn.Linear(in_features=12, out_features=len(tag_to_ind))
    self.log_softmax = torch.nn.LogSoftmax(dim=1) 
    
    self.perplexity = torchmetrics.text.Perplexity()
  def training_step(self, batch, batch_idx):
    x, target = batch
    embedds = self.embedding(x)
    gru_out, _ = self.gru(embedds)
    logits = self.classifier(gru_out)
    log_probs = self.log_softmax(logits)
    loss = self.perplexity(log_probs, target) # https://lightning.ai/docs/torchmetrics/stable/gallery/text/perplexity.html
    return loss

  def test_step(self, batch, batch_idx):
    x, target = batch
    embedds = self.embedding(x)
    lstm_out, _ = self.gru(embedds)
    logits = self.classifier(lstm_out)
    log_probs = self.log_softmax(logits)
    loss = self.perplexity(log_probs, target)
    self.log("test_perplexity", loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
    return loss

  def configure_optimizers(self):
    return torch.optim.SGD(self.parameters(), lr=0.1)
  
gru = GRUTagger(vocab, tag_to_ind)
gru

GRUTagger(
  (embedding): Embedding(13, 6)
  (gru): GRU(6, 12, batch_first=True)
  (classifier): Linear(in_features=12, out_features=4, bias=True)
  (log_softmax): LogSoftmax(dim=1)
  (perplexity): Perplexity()
)

# DataLoading

In [3]:

class TheDataModule(L.LightningDataModule):
  def __init__(self, data: list[tuple[list[str], list[str]]], tags: dict[str, int], vocab: dict[str, int]):
    super().__init__()
    self.data = data
    self.tags = tags
    self.vocab = vocab
  
  def prepare_data(self):
    pass
  
  def setup(self, stage: str):
    class Dataset(torch.utils.data.Dataset):
      def __init__(self, data, tags, vocab):
        self.data = data
        self.tags = tags
        self.vocab = vocab
      
      def __len__(self):
        return len(self.data)
      
      def __getitem__(self, idx):
        words, tags = self.data[idx]
        x = torch.tensor([self.vocab[word] for word in words], dtype=torch.long)
        y = torch.tensor([self.tags[tag] for tag in tags], dtype=torch.long)
        return x, y
    self.dataset = Dataset(self.data, self.tags, self.vocab)

  def train_dataloader(self):
    return torch.utils.data.DataLoader(self.dataset)
  def test_dataloader(self):
    return torch.utils.data.DataLoader(self.dataset)
  
data_module = TheDataModule(training_data, tag_to_ind, vocab)
data_module

<__main__.TheDataModule at 0x1b3e1a80ce0>

# Training

In [4]:
from lightning.pytorch import seed_everything as seed

trainer = L.Trainer(
  max_epochs=100,
  deterministic=True,
  enable_checkpointing=False,
  logger=False,  # Disable CSVLogger and all logging
)
trainer

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


<lightning.pytorch.trainer.trainer.Trainer at 0x1b3df91f6b0>

In [5]:
seed(1)
trainer.fit(gru, data_module)
test_results = trainer.test(gru, data_module)


Seed set to 1
You are using a CUDA device ('NVIDIA GeForce RTX 3050') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type       | Params | Mode 
---------------------------------------------------
0 | embedding   | Embedding  | 78     | train
1 | gru         | GRU        | 720    | train
2 | classifier  | Linear     | 52     | train
3 | log_softmax | LogSoftmax | 0      | train
4 | perplexity  | Perplexity | 0      | train
---------------------------------------------------
850       Trainable params
0         Non-trainable params
850       Total params
0.003     Total estimated model params size (MB)
5         Modules in train mode
0         Modules in eval mode
c:\Use

Training: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
c:\Users\Plancha\AML-homework\.pixi\envs\gpu\Lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

c:\Users\Plancha\AML-homework\.pixi\envs\gpu\Lib\site-packages\lightning\pytorch\core\module.py:512: You called `self.log('test_perplexity', ..., logger=True)` but have no logger configured. You can enable one by doing `Trainer(logger=ALogger(...))`


GRU got the lowest perplexity