<a href="https://colab.research.google.com/github/syedmahmoodiagents/NLP/blob/main/Minimal_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn

In [None]:
sentences = [
    ["the", "cat", "sat"],
    ["dogs", "are", "playing"],
    ["the", "dog", "is", "running"]
]

In [None]:
words = sorted(list(set([word for sentence in sentences for word in sentence])))

In [None]:
words

['are', 'cat', 'dog', 'dogs', 'is', 'playing', 'running', 'sat', 'the']

In [None]:
word_to_idx = {word: i+1 for i, word in enumerate(words)}

In [None]:
word_to_idx

{'are': 1,
 'cat': 2,
 'dog': 3,
 'dogs': 4,
 'is': 5,
 'playing': 6,
 'running': 7,
 'sat': 8,
 'the': 9}

In [None]:
vocab_size = len(word_to_idx) + 1

In [None]:
vocab_size

10

In [None]:
embedding_dim = 50

In [None]:
class LSTMModel(nn.Module):
    def __init__(self):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(10, 50)
        self.lstm = nn.LSTM(input_size=50, hidden_size=64, batch_first=True)
        self.fc = nn.Linear(64, 10)

    def forward(self, x):
        # Input x: (batch_size, seq_length)
        x = self.embedding(x) # After embedding: (batch_size, seq_length, embedding_dim)
        output, (hidden, cell) = self.lstm(x) # After LSTM: (batch_size, seq_length, hidden_size)
        # return(output.shape, hidden.shape, cell.shape) # hidden: (1, batch_size, embedding_dim)
        output = output[:, -1, :] # Take the last output of the LSTM: (batch_size, hidden_size
        return self.fc(output) # After linear layer: (batch_size, vocab_size)

In [None]:
model = LSTMModel()

In [None]:
# model(inputs).shape

In [None]:
# allinput_seq = []
# alloutput_seq = []
# for sentence in sentences:
#     encoded = [word_to_idx[word] for word in sentence]
#     for i in range(1, len(encoded)):
#         input_seq = encoded[:i]
#         target_seq = encoded[1:i+1]
#         allinput_seq.append(torch.tensor(input_seq))
#         alloutput_seq.append(torch.tensor(target_seq))

allinput_seq = []
alloutput_seq = []
for sentence in sentences:
    encoded = [word_to_idx[word] for word in sentence]
    for i in range(1, len(encoded)):
        input_seq = encoded[:i]
        target_seq = encoded[i:i+1]
        allinput_seq.append(torch.tensor(input_seq))
        alloutput_seq.append(torch.tensor(target_seq))


In [None]:
allinput_seq

[tensor([9]),
 tensor([9, 2]),
 tensor([4]),
 tensor([4, 1]),
 tensor([9]),
 tensor([9, 3]),
 tensor([9, 3, 5])]

In [None]:
alloutput_seq

[tensor([2]),
 tensor([8]),
 tensor([1]),
 tensor([6]),
 tensor([3]),
 tensor([5]),
 tensor([7])]

In [None]:
from torch.nn.utils.rnn import pad_sequence

In [None]:
inputs = pad_sequence([p for p in allinput_seq], batch_first=True)
targets = pad_sequence([p for p in alloutput_seq], batch_first=True)

In [None]:
inputs

tensor([[9, 0, 0],
        [9, 2, 0],
        [4, 0, 0],
        [4, 1, 0],
        [9, 0, 0],
        [9, 3, 0],
        [9, 3, 5]])

In [None]:
inputs.shape

torch.Size([7, 3])

In [None]:
criterion = nn.CrossEntropyLoss(ignore_index=0)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
out = model(inputs)

In [None]:
out.shape

torch.Size([7, 10])

In [None]:
out.view(-1, vocab_size).shape

torch.Size([7, 10])

In [None]:
targets.view(-1).shape

torch.Size([7])

In [None]:
EPOCHS = 30
model.train()
for epoch in range(EPOCHS):

    optimizer.zero_grad()

    outputs = model(inputs)
    loss = criterion(outputs.view(-1, vocab_size), targets.view(-1))

    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{EPOCHS}, Loss: {loss.item()}")

Epoch 10/30, Loss: 0.30990034341812134
Epoch 20/30, Loss: 0.20081520080566406
Epoch 30/30, Loss: 0.1987144500017166


In [None]:
model.eval()
with torch.no_grad():
    input_seq = torch.tensor([word_to_idx["the"], word_to_idx["dog"]]).unsqueeze(0)
    print("Input sequence:", "the", "dog")
    print("Predicted sequence:")
    output = model(input_seq)
    for _ in range(3):
        output = model(input_seq)
        next_token = output[-1:].argmax(dim=-1)
        predicted_word = [word for word, index in word_to_idx.items() if index == next_token.item()][0]
        print(predicted_word)
        input_seq = torch.cat([input_seq, next_token.unsqueeze(0)], dim=1)
# output[-1:]

Input sequence: the dog
Predicted sequence:
is
running
is
