<a href="https://colab.research.google.com/github/mahesh-keswani/pytorch-example-notebook/blob/main/15_PyTorchWithEmbedding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# This notebook will not contain detailed implementation of some application
# instead it is for how to exactly use Embedding layer which can be easily adapted based on the requirements
import torch
import torch.nn as nn
import numpy as np

In [2]:
class LSTMWithEmbedding(nn.Module):
    def __init__(self, input_dimension, hidden_dimension, output_dimension, embedding_dimension):
        super(LSTMWithEmbedding, self).__init__()

        self.input_dimension = input_dimension
        self.hidden_dimension = hidden_dimension
        self.output_dimension = output_dimension
        self.embedding_dimension = embedding_dimension

        # input_dimension can be size of  the vocab
        self.embedding = nn.Embedding(input_dimension, embedding_dimension)
        self.lstm = nn.LSTM(embedding_dimension, hidden_dimension, batch_first = True)
        self.fc = nn.Linear(hidden_dimension, output_dimension)

    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_dimension)
        c0 = torch.zeros(1, x.size(0), self.hidden_dimension)
        print(x.shape)

        x = self.embedding(x)
        print(x.shape)

        out, _ = self.lstm(x, (h0, c0))
        print(out.shape)
        # what you can do is, sometimes  instead of  using softmax layer
        # we can directly use: out, _ = torch.max(out, 1) inorder to do max  pool

        out = self.fc(out[:, -1, :])
        print(out.shape)
        return out

In [12]:
# Sample input parameters
input_dimension = 10  # Vocabulary size
hidden_dimension = 20  # Number of hidden units in LSTM
output_dimension = 5   # Number of output classes
embedding_dimension = 8  # Dimension of embeddings

In [13]:
# Create an instance of the model
model = LSTMWithEmbedding(input_dimension, hidden_dimension, output_dimension, embedding_dimension)

In [14]:
# Create a small valid sample input (batch_size, sequence_length).
# Input to the Embedding layer should be integers or long only
sample_input = torch.tensor([[1, 2, 3, 4], [2, 3, 4, 5]])  # Example batch of two sequences with 4 tokens each

# Pass the sample input through the model
output = model(sample_input)
print("Output:", output)

torch.Size([2, 4])
torch.Size([2, 4, 8])
torch.Size([2, 4, 20])
torch.Size([2, 5])
Output: tensor([[ 0.0540, -0.0490, -0.0998, -0.0354, -0.0661],
        [ 0.1168, -0.0288, -0.1062, -0.0041, -0.0637]],
       grad_fn=<AddmmBackward0>)
