In [1]:
import torch
import math

In [2]:
class SinusoidalPositionalEmbedding(torch.nn.Module):
    def __init__(self, num_positions, embedding_dim):
        super(SinusoidalPositionalEmbedding, self).__init__()
        
        # num_positions: Length of the input sequence (max positions)
        # embedding_dim: The dimensionality of the positional encoding
        
        self.num_positions = num_positions
        self.embedding_dim = embedding_dim
        
        # Initialize the positional encoding matrix
        pe = torch.zeros(num_positions, embedding_dim)
        
        # Create the positional encodings using sin and cos
        position = torch.arange(0, num_positions).unsqueeze(1).float()  # Shape (num_positions, 1)
        div_term = torch.exp(torch.arange(0, embedding_dim, 2).float() * -(math.log(10000.0) / embedding_dim))  # Shape (embedding_dim//2)
        
        pe[:, 0::2] = torch.sin(position * div_term)  # Apply sine to even indices (0, 2, 4,...)
        pe[:, 1::2] = torch.cos(position * div_term)  # Apply cosine to odd indices (1, 3, 5,...)
        
        # Add an extra dimension for batch size
        pe = pe.unsqueeze(0)  # Shape (1, num_positions, embedding_dim)
        
        # Register the positional encoding as a buffer (no parameters to update)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x is the input tensor with shape (batch_size, seq_len, embedding_dim)
        # Add positional encoding to the input embedding
        return x + self.pe[:, :x.size(1), :]
