<a href="https://colab.research.google.com/github/komalsali04/komalsali04/blob/main/RNN_Translator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class tiny_encoder(nn.Module):
    def __init__(self, input_vocab_size, embed_size, hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(input_vocab_size, embed_size)

        #RNN Paramters are initialised randomly
        self.hidden_size = hidden_size
        self.W_h = nn.Parameter(torch.randn(hidden_size, hidden_size))
        self.W_x = nn.Parameter(torch.randn(hidden_size, embed_size))
        self.b = nn.Parameter(torch.zeros(hidden_size))

    def forward(self, src_tokens):
      h = torch.zeros(self.hidden_size)

      for t in range(src_tokens.shape[0]):
        token_id = src_tokens[t]
        x_t = self.embedding(token_id)

        h = torch.tanh(torch.mv(self.W_h, h) + torch.mv(self.W_x, x_t) + self.b)

      return h

class tiny_decoder(nn.Module):
  def __init__(self, output_vocab_size, embed_size, hidden_size):
      super().__init__()
      self.embeddings = nn.Embedding(output_vocab_size, embed_size)

      self.hidden_size = hidden_size
      self.W_h = nn.Parameter(torch.randn(hidden_size, hidden_size))
      self.W_x = nn.Parameter(torch.randn(hidden_size, embed_size))
      self.b = nn.Parameter(torch.zeros(hidden_size))

      self.W_out = nn.Parameter(torch.randn(output_vocab_size, hidden_size))
      self.b_out = nn.Parameter(torch.zeros(output_vocab_size))

  def forward(self, dec_tokens, init_hidden):
      h = init_hidden
      Logits_list = []

      for t in range(dec_tokens.shape[0]):
        token_id = dec_tokens[t]
        x_t = self.embeddings(token_id)

        h = torch.tanh(torch.mv(self.W_h, h) + torch.mv(self.W_x, x_t) +self.b)
        logits_t = (torch.mv(self.W_out, h) + self.b_out)
        Logits_list.append(logits_t.unsqueeze(0))

      return torch.cat(Logits_list, dim=0)


ENG_VOCAB_SIZE = 29
HINDI_VOCAB_SIZE = 36

HIN_ID2WORD = {
    0: "GO",
    1: "मैं",
    2: "एक",
    3: "बैठक",
    4: "में",
    5: "व्यस्त",
    6: "था",
    7: "इसलिए",
    8: "मैं",
    9: "तुम्हें",
    10: "कॉल",
    11: "नहीं",
    12: "कर",
    13: "सका",
    14: ",",
    15: "और",
    16: "बाहर",
    17: "तेज़",
    18: "बारिश",
    19: "हो",
    20: "रही",
    21: "थी",
    22: ",",
    23: "इसी",
    24: "कारण",
    25: "मैंने",
    26: "तुम्हारे",
    27: "घर",
    28: "आने",
    29: "का",
    30: "प्लान",
    31: "रद्द",
    32: "कर",
    33: "दिया",
    34: "|",
    35: "EOS"
}

EMBED_SIZE = 7
HIDDEN_SIZE = 36


encoder = tiny_encoder(ENG_VOCAB_SIZE, EMBED_SIZE, HIDDEN_SIZE)
decoder = tiny_decoder(HINDI_VOCAB_SIZE, EMBED_SIZE, HIDDEN_SIZE)

#English tokens = "<GO>" "I" "was" "busy" "in" "a" "meeting" "that's" "why" "couldn't" "call" "you" "and" "it" "was" "raining" "outside" "due" "to" "which" "I" "canceled"
# "the" "plan" "of" "coming" "to" "your" "house"
encoder_input = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28])
decoder_input = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34])
decoder_target = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35])

criteria = nn.CrossEntropyLoss()
optimize = optim.SGD(list(encoder.parameters()) + list(decoder.parameters()), lr=0.6)

num_epochs = 1000
for epoch in range(num_epochs):
  optimize.zero_grad()

  #1. Encode
  encoder_hidden = encoder(encoder_input)

  #2. Decode
  logits = decoder(decoder_input, encoder_hidden)

  #3. Compute Loss
  loss = criteria(logits, decoder_target)

  #4. Backpropagate + Update
  loss.backward()
  optimize.step()

  if(epoch + 1) % 5 == 0:
    print(f"Epoch: {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}")

  if (epoch + 1) % 20 == 0:
    print(f"\n----Decoding after epoch {epoch+1}----")
    with torch.no_grad():

      encoder_hidden = encoder(encoder_input)

      current_token = torch.tensor(0)
      h = encoder_hidden.clone()

      generated_tokens = []
      for i in range(35):
        x_t = decoder.embeddings(current_token)
        h = torch.tanh(torch.mv(decoder.W_h, h) + torch.mv(decoder.W_x, x_t) + decoder.b)
        Logits_t = torch.mv(decoder.W_out, h) + decoder.b_out

        next_token = torch.argmax(Logits_t).item()
        generated_tokens.append(next_token)

        if next_token == 35:
          break
        current_token = torch.tensor(next_token)

      generated_words = [HIN_ID2WORD[t] for t in generated_tokens]
      print("Generated tokens: ", generated_words)
    print("-----------------------\n")


Epoch: 5/1000, Loss: 9.7140
Epoch: 10/1000, Loss: 6.2692
Epoch: 15/1000, Loss: 4.0741
Epoch: 20/1000, Loss: 2.5584

----Decoding after epoch 20----
Generated tokens:  ['मैंने', 'घर', 'का', 'तेज़', 'बारिश', 'हो', 'रही', 'मैं', 'तुम्हें', ',', 'इसी', 'कारण', 'मैं', '<EOS>']
-----------------------

Epoch: 25/1000, Loss: 1.5760
Epoch: 30/1000, Loss: 0.9826
Epoch: 35/1000, Loss: 0.6185
Epoch: 40/1000, Loss: 0.4026

----Decoding after epoch 40----
Generated tokens:  ['मैं', 'एक', 'बैठक', 'में', 'व्यस्त', 'था', 'इसलिए', 'मैं', 'तुम्हें', 'कॉल', 'नहीं', 'कर', 'सका', ',', 'और', 'में', 'व्यस्त', 'था', 'इसलिए', 'मैं', 'तुम्हें', 'कॉल', 'नहीं', 'कर', 'सका', ',', 'और', 'में', 'व्यस्त', 'था', 'इसलिए', 'मैं', 'तुम्हें', 'कॉल', 'नहीं']
-----------------------

Epoch: 45/1000, Loss: 0.2933
Epoch: 50/1000, Loss: 0.2311
Epoch: 55/1000, Loss: 0.1905
Epoch: 60/1000, Loss: 0.1618

----Decoding after epoch 60----
Generated tokens:  ['मैं', 'एक', 'बैठक', 'में', 'व्यस्त', 'था', 'इसलिए', 'मैं', 'तुम्हें', 'कॉल