## Overview

This notebook demonstrates a simple sequence classification pipeline using PyTorch with Learnable Positional Encoding technique.

##functions:

1. **Dummy Dataset**: Generates random input sequences and labels
2. **Model Definition**:
   - **Learnable Positional Encoding**: Adds learnable positional encodings to input sequences.
   - **Sequence Classifier**: A transformer based model that processes sequences and outputs class predictions with 2 layers stacked with multi head attention.


In [44]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

torch.manual_seed(42)

<torch._C.Generator at 0x7a828d3690d0>

In [40]:
# Hyperparameters
BATCH_SIZE = 32
seq_len = 50
dim_model = 128
num_classes = 5
EPOCHS = 5
LR = 1e-3
num_samples = 100

## **Creating a Synthetic Sequence Data**

In [29]:
class DummySequenceDataset(Dataset):
    """
    A dummy dataset for sequence classification models with random sequences and labels.
    """
    def __init__(self, num_samples, seq_len, dim_model, num_classes):
      """
      Args:
          num_samples (int): Number of samples in the dataset.
          seq_len (int): Length of input sequence.
      """
      self.data = torch.randn(num_samples, seq_len, dim_model)
      self.labels = torch.randint(0, num_classes, (num_samples,))

    def __len__(self):
        """
        Returns the number of samples in the dataset.
        """
        return len(self.data)

    def __getitem__(self, idx):
        """
        Returns a input sample and corresponding label from the dataset.
        """
        return self.data[idx], self.labels[idx]

In [41]:
dataset = DummySequenceDataset(num_samples,seq_len, dim_model, num_classes)

In [42]:
# Print the size of the sample input and the label

random_index = 4
sample, label = dataset[random_index]

print(f"input at index {random_index} has size: {sample.size()}")
print(f" correspoinding Label {random_index} is: {label}")

input at index 4 has size: torch.Size([50, 128])
 correspoinding Label 4 is: 1


In [32]:
class LearnablePositionalEncoding(nn.Module):
  """
  This module adds learnable positional encodings to the input sequence.
  The positional encodings are initialized with a normal distribution.
  """
  def __init__(self, dim_model, max_seq_len= 512):
      super().__init__()
      self.positional_encodings = nn.Parameter(torch.zeros(max_seq_len, dim_model))
      nn.init.normal_(self.positional_encodings, mean=0.0, std=0.02)

  def forward(self, x: torch.Tensor) -> torch.Tensor:
      seq_len = x.size(1)
      return x + self.positional_encodings[:max_seq_len, :].unsqueeze(0)

In [33]:
class SequenceClassifier(nn.Module):
    def __init__(self, dim_model, num_classes, maxseq_len=512):
      """
      Args:
            dim_model (int): The dimensionality of the input features and the Transformer model.
            num_classes (int): output classes.
            maxseq_len : The maximum sequence length for positional encoding. Defaults to 512.
      """
      super().__init__()
      self.pos_encoder = LearnablePositionalEncoding(dim_model, maxseq_len)
      self.transformer = nn.TransformerEncoder(
      nn.TransformerEncoderLayer(dim_model, nhead=4),num_layers=2)
      self.classifier = nn.Linear(dim_model, num_classes)

    def forward(self, x):
        x = self.pos_encoder(x)
        x = x.permute(1, 0, 2)
        x = self.transformer(x)
        x = x.mean(dim=0)
        return self.classifier(x)

## **Training**

In [43]:
# Create dataset and dataloader
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

In [35]:
model = SequenceClassifier(dim_model, num_classes)
criterion = nn.CrossEntropyLoss()
# update model parameters during training
optimizer = torch.optim.Adam(model.parameters(), lr=LR)



In [36]:
model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    for batch, (inputs, labels) in enumerate(dataloader):
       # # Clears the gradients from the previous batch
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        # compute gradiests
        loss.backward()
        # Update the gradients.
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{EPOCHS} | Loss: {total_loss/len(dataloader):.4f}")

Epoch 1/5 | Loss: 2.5007
Epoch 2/5 | Loss: 2.1751
Epoch 3/5 | Loss: 1.5624
Epoch 4/5 | Loss: 1.6777
Epoch 5/5 | Loss: 1.7449


## **Testing**

In [37]:
model.eval()
test_dataset = DummySequenceDataset(num_samples, seq_len,dim_model, num_classes)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [38]:
correct = 0
total = 0
# Disables gradient
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"\nTest Accuracy: {100 * correct / total:.2f}%")


Test Accuracy: 15.00%
