In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
# ---------------------------------------------
# Vocabulary Setup
# ---------------------------------------------
SRC_VOCAB = {'<pad>': 0, '<sos>': 1, '<eos>': 2, 'hello': 3,'good morning':4}
TRG_VOCAB = {'<pad>': 0, '<sos>': 1, '<eos>': 2, 'namaste': 3,'subhodayam':4}

SRC_itos = {v: k for k, v in SRC_VOCAB.items()}
TRG_itos = {v: k for k, v in TRG_VOCAB.items()}


# ---------------------------------------------
# Encoder
# ---------------------------------------------
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hid_dim, batch_first=True)

    def forward(self, src):
        embedded = self.embedding(src)
        outputs, hidden = self.rnn(embedded)
        return hidden  # (1, batch, hid_dim)

# ---------------------------------------------
# Decoder
# ---------------------------------------------
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim):
        super().__init__()
        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, hid_dim, batch_first=True)
        self.fc_out = nn.Linear(hid_dim, output_dim)

    def forward(self, input, hidden):
        input = input.unsqueeze(1)  # (batch, 1)
        embedded = self.embedding(input)  # (batch, 1, emb_dim)
        output, hidden = self.rnn(embedded, hidden)
        prediction = self.fc_out(output.squeeze(1))  # (batch, output_dim)
        return prediction, hidden

# ---------------------------------------------
# Seq2Seq Wrapper with Inference
# ---------------------------------------------
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device

    def forward(self, src, trg, criterion, teacher_forcing=True):
        hidden = self.encoder(src)
        input_token = trg[:, 0]  # <sos>
        loss = 0
        for t in range(1, trg.shape[1]):
            output, hidden = self.decoder(input_token, hidden)
            loss += criterion(output, trg[:, t])
            input_token = trg[:, t] if teacher_forcing else output.argmax(1)
        return loss

    def inference(self, src, max_len=10):
        self.eval()
        hidden = self.encoder(src)
        input_token = torch.tensor([TRG_VOCAB['<eos>']], device=self.device)

        output_tokens = []
        for _ in range(max_len):
            output, hidden = self.decoder(input_token, hidden)
            top1 = output.argmax(1)  # Greedy decoding
            if top1.item() == TRG_VOCAB['<eos>']:
                break
            output_tokens.append(top1.item())
            input_token = top1
        return output_tokens

# ---------------------------------------------
# Run Translation
# ---------------------------------------------
'''device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
encoder = Encoder(INPUT_DIM, EMB_DIM, HID_DIM).to(device)
decoder = Decoder(OUTPUT_DIM, EMB_DIM, HID_DIM).to(device)
model = Seq2Seq(encoder, decoder, device).to(device)

# Inference
src_sentence = src_sentence.to(device)
predicted_indices = model(src_sentence)

# Convert indices back to words
translated_sentence = [TRG_itos[idx] for idx in predicted_indices]
print("Predicted translation:", ' '.join(translated_sentence))'''
# -------------------------------------
# 5. Training and Inference
# -------------------------------------
def main():
    # Parameters
    INPUT_DIM = len(SRC_VOCAB)
    OUTPUT_DIM = len(TRG_VOCAB)
    EMB_DIM = 8
    HID_DIM = 16
    NUM_EPOCHS = 200

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    encoder = Encoder(INPUT_DIM, EMB_DIM, HID_DIM).to(device)
    decoder = Decoder(OUTPUT_DIM, EMB_DIM, HID_DIM).to(device)
    model = Seq2Seq(encoder, decoder, device).to(device)

    optimizer = optim.Adam(model.parameters(), lr=0.01)
    criterion = nn.CrossEntropyLoss(ignore_index=TRG_VOCAB['<pad>'])

    # Input: <sos> hello good morning<eos>
    # Target: <sos> namaste subhodayam<eos>
    src_tensor = torch.tensor([[SRC_VOCAB['<sos>'], SRC_VOCAB['hello'],SRC_VOCAB['good morning'], SRC_VOCAB['<eos>']]], dtype=torch.long).to(device)
    trg_tensor = torch.tensor([[TRG_VOCAB['<sos>'], TRG_VOCAB['namaste'],TRG_VOCAB['subhodayam'], TRG_VOCAB['<eos>']]], dtype=torch.long).to(device)

    # Training loop
    for epoch in range(NUM_EPOCHS):
        model.train()
        optimizer.zero_grad()
        loss = model(src_tensor, trg_tensor, criterion)
        loss.backward()
        optimizer.step()
        if (epoch + 1) % 20 == 0:
            print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {loss.item():.4f}")

    # Inference
    model.eval()
    prediction = model.inference(src_tensor)
    #print("src:", src_tensor)
    print("Decoded token IDs:", prediction)
    translated_words = [TRG_itos[i] for i in prediction]
    print("Predicted Translation:", ' '.join(translated_words))

if __name__ == "__main__":
    main()

Epoch 20/200, Loss: 0.4150
Epoch 40/200, Loss: 0.0237
Epoch 60/200, Loss: 0.0103
Epoch 80/200, Loss: 0.0074
Epoch 100/200, Loss: 0.0060
Epoch 120/200, Loss: 0.0050
Epoch 140/200, Loss: 0.0042
Epoch 160/200, Loss: 0.0036
Epoch 180/200, Loss: 0.0031
Epoch 200/200, Loss: 0.0027
Decoded token IDs: [3, 4]
Predicted Translation: namaste subhodayam
