### Design a learnable positional encoding method using pytorch

In [1]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

In [2]:
# Define hyperparameters
embedding_dim = 16  # Dimensionality of word embeddings
hidden_dim = 32  # Dimensionality of hidden states in RNN
num_layers = 2  # Number of layers in the RNN
learning_rate = 3e-3  # Learning rate for training
num_epochs = 25  # Number of training epochs - To view the initial vs updated positional weights clearly

# Define dummy dataset parameters
num_samples = 40  # Number of samples in the dataset
custom_max_sequence_length = 10  # Maximum length of input sequences
custom_vocab_size = 12  # Vocabulary size
custom_batch_size = 4  # Batch size for training

In [3]:
# Create Dummy Train dataset
data_sequences=torch.randint(0,custom_vocab_size,(num_samples,custom_max_sequence_length))
op_labels=torch.randint(0,2,(num_samples,))

# Display dataset shapes and examples
print(f'{data_sequences.shape=}')
print(f'{op_labels.shape=}')
print(f'{data_sequences[0]=}')
print(f'{op_labels[1]=}')

data_sequences.shape=torch.Size([40, 10])
op_labels.shape=torch.Size([40])
data_sequences[0]=tensor([ 3, 10,  7,  7,  7,  1,  9,  8,  3,  2])
op_labels[1]=tensor(1)


In [4]:
# Define Positional Encoding Layer
class PositionalEncoding(nn.Module):
    """
    Positional Encoding Layer:
    Adds positional encodings to input sequences to provide positional information to the model.

    Args:
        embedding_dim (int): Dimensionality of word embeddings.
        max_length (int): Maximum length of input sequences.
    """

    def __init__(self,embedding_dim,max_length):
        super(PositionalEncoding,self).__init__()
        """
        Description:
            Calls the constructor of the superclass nn.Module within the PositionalEncoding class.
            It initializes the PositionalEncoding class,ensuring that the methods and attributes defined in the superclass are also available to the PositionalEncoding class.
        """
        self.embedding_dim=embedding_dim
        self.max_length=max_length
        self.positional_encoding=nn.Embedding(max_length,embedding_dim)

    def forward(self,x):
        """
        Forward pass of the Positional Encoding Layer.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size,sequence_length,embedding_dim).

        Returns:
            torch.Tensor: Output tensor with positional encodings added.
        """
        torch_positions=torch.arange(0,x.size(1)).unsqueeze(0).repeat(x.size(0),1).to(x.device)
        pos_embedding=self.positional_encoding(torch_positions)
        final_vector=x + pos_embedding
        return final_vector

# Define Custom Model
class CustomModel(nn.Module):
    """
    Custom Model:
    Basic neural network model that incorporates positional encoding using the PositionalEncoding layer.
    Consists of an embedding layer,a positional encoding layer (learnable),a recurrent layer (GRU),and a fully connected layer.

    Args:
        embedding_dim (int): Dimensionality of word embeddings.
        hidden_dim (int): Dimensionality of hidden states in the recurrent layer.
        num_layers (int): Number of layers in the recurrent layer.
        vocab_size (int): Size of the vocabulary.
        max_length (int): Maximum length of input sequences.
    """

    def __init__(self,embedding_dim,hidden_dim,num_layers,vocab_size,max_length):
        super(CustomModel,self).__init__()
        self.embedding=nn.Embedding(vocab_size,embedding_dim)
        self.pos_encoder=PositionalEncoding(embedding_dim,max_length)
        self.rnn_lay=nn.GRU(embedding_dim,hidden_dim,num_layers,batch_first=True)
        self.fc_lay=nn.Linear(hidden_dim,1)

    def forward(self,x):
        """
        Forward pass of the Simple Model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size,sequence_length).

        Returns:
            torch.Tensor: Output tensor after passing through the model.
        """
        x=self.embedding(x)
        x=self.pos_encoder(x)
        _,h_n=self.rnn_lay(x)
        output=self.fc_lay(h_n[-1])
        output=output.squeeze(1)
        return output

In [5]:
# Initialize model,loss function,and optimizer
pe_learn_model=CustomModel(embedding_dim,hidden_dim,num_layers,custom_vocab_size,custom_max_sequence_length)
criterion=nn.BCEWithLogitsLoss()
optimizer=optim.Adam([
    {'params': pe_learn_model.embedding.parameters()},
    {'params': pe_learn_model.pos_encoder.parameters()},
    {'params': pe_learn_model.rnn_lay.parameters()},
    {'params': pe_learn_model.fc_lay.parameters()}
],lr=learning_rate)

In [6]:
# Initial Positional Encoding Lookup Table
initial_pe_weights=pe_learn_model.pos_encoder.positional_encoding.weight.clone().detach()

In [7]:
# Training loop
for epoch in range(num_epochs):
    total_loss=0
    for i in range(0,num_samples,custom_batch_size):
        optimizer.zero_grad()
        batch_sequences=data_sequences[i:i+custom_batch_size]
        batch_labels=op_labels[i:i+custom_batch_size]
        output=pe_learn_model(batch_sequences)
        loss=criterion(output,batch_labels.float())
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}/{num_epochs},Loss: {total_loss}")

Epoch 1/25,Loss: 6.894695997238159
Epoch 2/25,Loss: 6.640145123004913
Epoch 3/25,Loss: 6.480790078639984
Epoch 4/25,Loss: 6.219516813755035
Epoch 5/25,Loss: 5.715315133333206
Epoch 6/25,Loss: 4.88611027598381
Epoch 7/25,Loss: 3.6961225271224976
Epoch 8/25,Loss: 2.2221962064504623
Epoch 9/25,Loss: 1.0123903900384903
Epoch 10/25,Loss: 0.8180086947977543
Epoch 11/25,Loss: 1.510180700570345
Epoch 12/25,Loss: 0.6468502841889858
Epoch 13/25,Loss: 0.1635905266739428
Epoch 14/25,Loss: 0.14276140881702304
Epoch 15/25,Loss: 0.08291427814401686
Epoch 16/25,Loss: 0.055168478516861796
Epoch 17/25,Loss: 0.044889040873385966
Epoch 18/25,Loss: 0.0387532499153167
Epoch 19/25,Loss: 0.03406481328420341
Epoch 20/25,Loss: 0.030309422523714602
Epoch 21/25,Loss: 0.027257984620518982
Epoch 22/25,Loss: 0.024734648526646197
Epoch 23/25,Loss: 0.022610895335674286
Epoch 24/25,Loss: 0.020795524469576776
Epoch 25/25,Loss: 0.019224266114179045


In [8]:
# Updated Positional Encoding Lookup Table
updated_pe_weights=pe_learn_model.pos_encoder.positional_encoding.weight.clone().detach()

# Print initial and updated positional encoding weights
print("-" * 50)
print(f'{initial_pe_weights.shape=},{updated_pe_weights.shape=}')
print(f'{initial_pe_weights[0:2]=}')
print(f'{updated_pe_weights[0:2]=}')
print(f'Trained ? {not(torch.allclose(initial_pe_weights[0:2],updated_pe_weights[0:2]))}')
print("-" * 50)
print("Initial Positional Encoding Lookup Table:")
print(initial_pe_weights)
print("=" * 50)
print("Updated Positional Encoding Lookup Table:")
print(updated_pe_weights)
print("-" * 50)

--------------------------------------------------
initial_pe_weights.shape=torch.Size([10, 16]),updated_pe_weights.shape=torch.Size([10, 16])
initial_pe_weights[0:2]=tensor([[-0.7854,  1.7261,  0.2249, -0.8070, -1.0902,  0.2987,  0.3929,  1.2629,
         -0.0936, -1.0125,  0.7450, -0.5617, -0.3318, -1.7285, -0.6494, -0.3264],
        [-0.5707,  1.2371, -0.3900,  1.4904, -0.2800,  0.5145,  0.3783,  1.6577,
          1.0607, -0.6229,  2.0561,  0.3582, -1.0336,  0.6204, -0.1082, -1.3793]])
updated_pe_weights[0:2]=tensor([[-0.7303,  1.8327,  0.3497, -0.8721, -1.1539,  0.2085,  0.3146,  1.3467,
         -0.0717, -1.0628,  0.7773, -0.5866, -0.3985, -1.6503, -0.6222, -0.2936],
        [-0.6016,  1.1218, -0.3039,  1.5585, -0.3050,  0.5415,  0.4085,  1.7482,
          1.0593, -0.6896,  2.1031,  0.3863, -1.1077,  0.7604, -0.0519, -1.3433]])
Trained ? True
--------------------------------------------------
Initial Positional Encoding Lookup Table:
tensor([[-0.7854,  1.7261,  0.2249, -0.8070, -1

In [20]:
# Create Dummy validation dataset
validation_sequences = torch.randint(0, custom_vocab_size, (num_samples, custom_max_sequence_length))
validation_labels = torch.randint(0, 2, (num_samples,))

# Making predictions on the validation dataset
with torch.no_grad():
    val_output = pe_learn_model(validation_sequences)
    val_predictions = torch.round(torch.sigmoid(val_output))

# Compare predictions with ground truth labels
correct_predictions = (val_predictions == validation_labels).sum().item()

# Calculate accuracy
accuracy = correct_predictions / len(validation_labels)

print(f"Accuracy: {round(number=(accuracy*100),ndigits=4)}%")

Accuracy: 62.5%


In [10]:
print(f'{validation_labels = }')
print(f'{val_output = }')
print(f'{torch.round(torch.sigmoid(val_output)) = }')

validation_labels = tensor([0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1,
        0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0])
val_output = tensor([ 5.7136, -5.2054, -6.6005, -4.2504, -4.0834,  5.9186,  7.2612,  4.7124,
        -5.0508,  7.4155, -7.0193,  4.5231,  5.6834,  1.9681, -4.3108,  6.8783,
         7.0421, -3.8888, -6.0755, -5.5331,  4.9317,  6.9413,  7.0512,  4.1541,
         7.3541,  5.4579, -6.0865,  7.4797,  6.5507,  0.9967, -2.6785,  6.2085,
        -4.7132,  6.3882,  7.5549,  1.3410,  2.3158, -4.1999, -2.8665, -0.5916])
torch.round(torch.sigmoid(val_output)) = tensor([1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0.,
        0., 0., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 0., 1., 1., 1.,
        1., 0., 0., 0.])
