In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# -------------------------
# 1. Dataset
# -------------------------
english_sentences = ["How are you today?", "What is your name?"]
french_sentences  = ["Comment vas-tu aujourd'hui ?", "Quel est ton nom ?"]


In [2]:


# 2. Vocabulary builder
# -------------------------
def build_vocab(sentences):
    words = set()
    for s in sentences:
        for w in s.split():
            words.add(w)
    word2idx = {w: i+1 for i, w in enumerate(sorted(words))}
    word2idx["<PAD>"] = 0
    idx2word = {i: w for w, i in word2idx.items()}
    return word2idx, idx2word

eng_word2index, eng_index2word = build_vocab(english_sentences)
fre_word2index, fre_index2word = build_vocab(french_sentences)


In [3]:

# -------------------------
# 3. Convert sentences â†’ tensors
# -------------------------
def sentence_to_tensor(sentence, vocab):
    return torch.tensor([vocab[w] for w in sentence.split()], dtype=torch.long)

input_tensors  = [sentence_to_tensor(s, eng_word2index) for s in english_sentences]
target_tensors = [sentence_to_tensor(s, fre_word2index) for s in french_sentences]

In [4]:


# -------------------------
# 4. Seq2Seq Model
# -------------------------
class Seq2Seq(nn.Module):
    def __init__(self, input_vocab_size, output_vocab_size, hidden_size):
        super().__init__()

        self.encoder_emb = nn.Embedding(input_vocab_size, hidden_size)
        self.decoder_emb = nn.Embedding(output_vocab_size, hidden_size)

        self.encoder = nn.LSTM(hidden_size, hidden_size)
        self.decoder = nn.LSTM(hidden_size, hidden_size)

        self.fc = nn.Linear(hidden_size, output_vocab_size)

    def forward(self, input_seq, target_seq):
        # Encoder
        enc_emb = self.encoder_emb(input_seq)        # (seq_len, batch, hidden)
        enc_out, hidden = self.encoder(enc_emb)      # hidden = (h, c)

        # Decoder
        dec_emb = self.decoder_emb(target_seq)
        dec_out, _ = self.decoder(dec_emb, hidden)

        # Output projection to vocab
        out = self.fc(dec_out)                       # (seq_len, batch, vocab)
        return out


In [5]:


# -------------------------
# 5. Instantiate model
# -------------------------
input_size  = len(eng_word2index)
output_size = len(fre_word2index)
hidden_size = 256

model = Seq2Seq(input_size, output_size, hidden_size)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [6]:


# -------------------------
# 6. Training Loop
# -------------------------
epochs = 200

for epoch in range(epochs):
    total_loss = 0

    for input_tensor, target_tensor in zip(input_tensors, target_tensors):
        optimizer.zero_grad()

        # LSTM expects shape (seq_len, batch_size)
        input_seq  = input_tensor.unsqueeze(1)
        target_seq = target_tensor.unsqueeze(1)

        output = model(input_seq, target_seq)
        loss = criterion(output.squeeze(1), target_tensor)

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    if (epoch + 1) % 20 == 0:
        print(f"Epoch [{epoch+1}/{epochs}] Loss = {total_loss/len(input_tensors):.4f}")


Epoch [20/200] Loss = 0.0098
Epoch [40/200] Loss = 0.0034
Epoch [60/200] Loss = 0.0022
Epoch [80/200] Loss = 0.0015
Epoch [100/200] Loss = 0.0011
Epoch [120/200] Loss = 0.0009
Epoch [140/200] Loss = 0.0007
Epoch [160/200] Loss = 0.0006
Epoch [180/200] Loss = 0.0005
Epoch [200/200] Loss = 0.0004


In [7]:


# -------------------------
# 7. Inference (simple greedy decoding)
# -------------------------
def translate(sentence, model):
    model.eval()
    with torch.no_grad():
        input_tensor = sentence_to_tensor(sentence, eng_word2index)
        input_seq = input_tensor.unsqueeze(1)

        # decoder receives dummy tokens (PAD)
        target_seq = torch.zeros(len(input_tensor), 1, dtype=torch.long)

        output = model(input_seq, target_seq)
        ids = output.argmax(-1).squeeze(1).tolist()

        words = [fre_index2word[i] for i in ids]
        return " ".join(words)

# -------------------------
# 8. Test translation
# -------------------------
print("\n--- TRANSLATION TESTS ---")
print("Input: How are you today?")
print("Output:", translate("How are you today?", model))

print("\nInput: What is your name?")
print("Output:", translate("What is your name?", model))



--- TRANSLATION TESTS ---
Input: How are you today?
Output: Comment vas-tu aujourd'hui aujourd'hui

Input: What is your name?
Output: Quel est ton ton
