In [171]:
text = "One day, a hare was showing off how fast he could run. \
        He laughed at the turtle for being so slow. After seeing the overconfidence,\
        the turtle moved him to a race. The hare (rabbit) laughed at the turtle's test, \
        and he accepted his demand. As the race began, the rabbit ran extremely quickly\
        and went far ahead of the turtle and got drained. He thought there was a lot of\
        time to relax as the turtle was far away. Soon he slept, thinking he would\
        win the race easily. However, the turtle(tortoise) kept walking slowly until\
        he arrived at the finish line. The rabbit sees the turtle on the opposite side\
        of the finish line. The turtle had won the race. Rabbit laughed the turtle"

In [172]:
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn

In [173]:
class customDataset(Dataset):
  def __init__(self, text, word2idx, seq_length):
    self.text = text
    self.word2idx = word2idx
    self.seq_length = seq_length

  def __len__(self):
    return len(self.text) - self.seq_length

  def __getitem__(self,index):
    sequence = [self.word2idx[word] for word in self.text[index:index+self.seq_length]]
    target = self.word2idx[self.text[index+self.seq_length]]

    return torch.tensor(sequence), torch.tensor(target)



In [174]:
word2idx = {word: i for i,word in enumerate(set(text.split()))}
idx2word = {i:word for word, i in word2idx.items()}

In [175]:
dataset = customDataset(text.split(),word2idx,seq_length=10)

In [176]:
dataset[0]

(tensor([ 2, 18, 45, 56, 66, 37, 70, 74, 33, 69]), tensor(25))

In [177]:
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [178]:
class LSTMmodel(nn.Module):
  def __init__(self, vocab_size, embed_size, hidden_size):
    super(LSTMmodel, self).__init__()
    self.embed = nn.Embedding(vocab_size, embed_size)
    self.lstm = nn.LSTM(embed_size,hidden_size,10,batch_first=True)
    self.fc = nn.Linear(hidden_size,vocab_size)

  def forward(self, x, h0,c0):
    embed = self.embed(x)
    out,(h_n,c_n) = self.lstm(embed, (h0,c0))
    output = self.fc(out[:,-1,:])
    return output, (h_n,c_n)

In [179]:
model = LSTMmodel(len(word2idx),embed_size=128,hidden_size=256 )

In [180]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr =0.01)

In [181]:
for epoch in range(10):
  for input,label in dataloader:
    optimizer.zero_grad()
    h0 = torch.zeros(10,input.size(0),256)
    c0 = torch.zeros(10,input.size(0),256)
    outputs, _ = model(input,h0,c0)
    loss = criterion(outputs,label)
    loss.backward()
    optimizer.step()
  print(f"Epoch {epoch} : Loss : {loss.item()}")



Epoch 0 : Loss : 4.353806972503662
Epoch 1 : Loss : 4.35554838180542
Epoch 2 : Loss : 4.353756904602051
Epoch 3 : Loss : 4.369394302368164
Epoch 4 : Loss : 4.341869354248047
Epoch 5 : Loss : 4.352730751037598
Epoch 6 : Loss : 4.34388542175293
Epoch 7 : Loss : 4.354694366455078
Epoch 8 : Loss : 4.339351654052734
Epoch 9 : Loss : 4.328642845153809


In [182]:
input_seq = torch.tensor([word2idx[word] for word in text.split()[-10:]]).unsqueeze(0)
h0 = torch.zeros(10,input_seq.size(0),256)
c0 = torch.zeros(10,input_seq.size(0),256)
ouput, _ = model(input_seq,h0,c0)
predicted_word = idx2word[ouput.argmax().item()]
print(f"Predicted next word: {predicted_word}")

Predicted next word: He
