<a href="https://colab.research.google.com/github/syedmahmoodiagents/NLP/blob/main/Translation_Encoder_Decoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import random

In [2]:
eng_sentences = [
    ["i", "love", "india"],
    ["you", "like", "music", "very", "much"]
]

fr_sentences = [
    ["<sos>", "je", "t'aime", "l'inde", "<eos>"],
    ["<sos>", "tu", "aimes", "la", "musique", "<eos>"]
]

eng_vocab = {"<pad>":0}
fr_vocab  = {"<pad>":0}

In [3]:
for sent in eng_sentences:
    for w in sent:
        if w not in eng_vocab: eng_vocab[w] = len(eng_vocab)

for sent in fr_sentences:
    for w in sent:
        if w not in fr_vocab: fr_vocab[w] = len(fr_vocab)

In [5]:
idx2fr = {v:k for k,v in fr_vocab.items()}

In [8]:
SOS = fr_vocab["<sos>"]
EOS = fr_vocab["<eos>"]

In [9]:
idx2fr

{0: '<pad>',
 1: '<sos>',
 2: 'je',
 3: "t'aime",
 4: "l'inde",
 5: '<eos>',
 6: 'tu',
 7: 'aimes',
 8: 'la',
 9: 'musique'}

In [10]:
def encode(words, vocab):
    idxs = [vocab[w] for w in words]
    return torch.tensor(idxs).unsqueeze(1)

In [11]:
src1 = encode(eng_sentences[0], eng_vocab)
src2 = encode(eng_sentences[1], eng_vocab)
trg1 = encode(fr_sentences[0], fr_vocab)
trg2 = encode(fr_sentences[1], fr_vocab)

In [20]:
class Encoder(nn.Module):
    def __init__(self, vocab, emb, hid):
        super().__init__()
        self.embed = nn.Embedding(len(vocab), emb)
        self.rnn = nn.LSTM(emb, hid)

    def forward(self, src):
        emb = self.embed(src)       # [T,1,E]
        outputs, hidden = self.rnn(emb)
        return hidden

In [21]:
class Decoder(nn.Module):
    def __init__(self, vocab, emb, hid):
        super().__init__()
        self.embed = nn.Embedding(len(vocab), emb)
        self.rnn = nn.LSTM(emb, hid)
        self.fc = nn.Linear(hid, len(vocab))

    def forward(self, y_prev, hidden):
        emb = self.embed(y_prev).unsqueeze(0)  # [1,1,E]
        out, hidden = self.rnn(emb, hidden)    # out: [1,1,H]
        pred = self.fc(out.squeeze(0))         # pred: [1, vocab]
        return pred, hidden

In [22]:
class Seq2Seq(nn.Module):
    def __init__(self, enc, dec):
        super().__init__()
        self.enc = enc
        self.dec = dec

    def forward(self, src, trg, teacher_ratio=0.5):
        trg_len = trg.size(0)
        vocab_size = len(fr_vocab)

        outputs = torch.zeros(trg_len, 1, vocab_size)

        hidden = self.enc(src)

        y_prev = trg[0]  # <sos>

        for t in range(1, trg_len):
            pred, hidden = self.dec(y_prev, hidden)

            outputs[t] = pred

            use_tf = random.random() < teacher_ratio
            top1 = pred.argmax(1)

            y_prev = trg[t] if use_tf else top1

        return outputs

In [23]:
embed_dim = 16
hidden_dim = 32

enc = Encoder(eng_vocab, embed_dim, hidden_dim)
dec = Decoder(fr_vocab, embed_dim, hidden_dim)
model = Seq2Seq(enc, dec)

In [24]:
criterion = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=0.01)

In [25]:
for epoch in range(200):
    for src, trg in [(src1, trg1), (src2, trg2)]:
        optim.zero_grad()
        out = model(src, trg, teacher_ratio=0.7)

        loss = 0
        for t in range(1, trg.size(0)):
            # [batch, vocab]
            # target must be [batch]
            pred = out[t].squeeze(0)          # [vocab] â†’ [1,vocab]
            pred = pred.unsqueeze(0)          # Make it [1, vocab]

            target = trg[t]

            loss += criterion(pred, target)

        loss.backward()
        optim.step()

    if epoch % 50 == 0:
        print(f"epoch {epoch}, loss={loss.item():.4f}")

epoch 0, loss=11.3140
epoch 50, loss=0.0363
epoch 100, loss=0.0134
epoch 150, loss=0.0075


In [26]:
def translate(src, max_len=10):
    model.eval()
    with torch.no_grad():
        hidden = model.enc(src)
        y_prev = torch.tensor([SOS])
        out_words = []

        for _ in range(max_len):
            pred, hidden = model.dec(y_prev, hidden)
            token = pred.argmax(1).item()

            if token == EOS:
                break

            out_words.append(idx2fr[token])
            y_prev = torch.tensor([token])

    return out_words

print("\nEN 1:", eng_sentences[0])
print("FR 1:", translate(src1))

print("\nEN 2:", eng_sentences[1])
print("FR 2:", translate(src2))



EN 1: ['i', 'love', 'india']
FR 1: ['je', "t'aime", "l'inde"]

EN 2: ['you', 'like', 'music', 'very', 'much']
FR 2: ['tu', 'aimes', 'la', 'musique']
