In [2]:
import torch
import torch.nn as nn

# Step 1: Define the toy vocabulary
vocab = {"hi": 0, "there": 1, "hello": 2}
vocab_size = len(vocab)
embedding_dim = 4  # small for simplicity
hidden_dim = 3

# Step 2: Create BiLSTM Model
class BiLSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(BiLSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.bilstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True, bidirectional=True)

    def forward(self, x):
        print("\n🔹 Input indices:", x)
        embeds = self.embedding(x)
        print("🔹 Word embeddings:\n", embeds)

        output, (h_n, c_n) = self.bilstm(embeds)
        print("🔹 BiLSTM output at each time step:\n", output)
        print("🔹 Final hidden state (2 directions):\n", h_n)
        return output

# Step 3: Initialize model
model = BiLSTMModel(vocab_size, embedding_dim, hidden_dim)

# Step 4: Prepare input tensor
# Convert "hi there" -> [0, 1]
sentence = ["hi", "there"]
input_indices = torch.tensor([[vocab[word] for word in sentence]])  # shape: (1, 2)
print("Input indices:", input_indices)

# Step 5: Run through BiLSTM
output = model(input_indices)


Input indices: tensor([[0, 1]])

🔹 Input indices: tensor([[0, 1]])
🔹 Word embeddings:
 tensor([[[ 0.9283, -0.0850, -1.0262, -1.0040],
         [ 1.7449,  2.1992, -1.6796,  1.5037]]], grad_fn=<EmbeddingBackward0>)
🔹 BiLSTM output at each time step:
 tensor([[[ 0.1778,  0.1600,  0.0018,  0.0060, -0.3635,  0.0412],
         [-0.0299,  0.5586,  0.0684,  0.0786, -0.5420, -0.0558]]],
       grad_fn=<TransposeBackward0>)
🔹 Final hidden state (2 directions):
 tensor([[[-0.0299,  0.5586,  0.0684]],

        [[ 0.0060, -0.3635,  0.0412]]], grad_fn=<StackBackward0>)
