In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from nltk.translate.bleu_score import sentence_bleu
import random

# Sample English to French sentence pairs
english_to_french = [

    ("I am cold", "J'ai froid"),

    ("You are tired", "Tu es fatigué"),

    ("He is hungry", "Il a faim"),

    ("She is happy", "Elle est heureuse"),

    ("We are friends", "Nous sommes amis"),

    ("They are students", "Ils sont étudiants"),

    ("The cat is sleeping", "Le chat dort"),

    ("The sun is shining", "Le soleil brille"),

    ("We love music", "Nous aimons la musique"),

    ("She speaks French fluently", "Elle parle français couramment"),

    ("He enjoys reading books", "Il aime lire des livres"),

    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),

    ("The movie starts at 7 PM", "Le film commence à 19 heures"),

    ("She wears a red dress", "Elle porte une robe rouge"),

    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),

    ("He drives a blue car", "Il conduit une voiture bleue"),

    ("They visit museums often", "Ils visitent souvent des musées"),

    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),

    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),

    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),

    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),

    ("They travel around the world", "Ils voyagent autour du monde"),

    ("The book is on the table", "Le livre est sur la table"),

    ("She dances gracefully", "Elle danse avec grâce"),

    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),

    ("He works hard every day", "Il travaille dur tous les jours"),

    ("They speak different languages", "Ils parlent différentes langues"),

    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),

    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),

    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),

    ("The dog barks loudly", "Le chien aboie bruyamment"),

    ("He sings beautifully", "Il chante magnifiquement"),

    ("They swim in the pool", "Ils nagent dans la piscine"),

    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),

    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),

    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),

    ("He paints landscapes", "Il peint des paysages"),

    ("They laugh at the joke", "Ils rient de la blague"),

    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),

    ("She runs in the park", "Elle court dans le parc"),

    ("We travel by train", "Nous voyageons en train"),

    ("He writes a letter", "Il écrit une lettre"),

    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),

    ("The baby cries", "Le bébé pleure"),

    ("She studies hard for exams", "Elle étudie dur pour les examens"),

    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),

    ("He fixes the car", "Il répare la voiture"),

    ("They drink coffee in the morning", "Ils boivent du café le matin"),

    ("The sun sets in the evening", "Le soleil se couche le soir"),

    ("She dances at the party", "Elle danse à la fête"),

    ("We play music at the concert", "Nous jouons de la musique au concert"),

    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),

    ("They study French grammar", "Ils étudient la grammaire française"),

    ("The rain falls gently", "La pluie tombe doucement"),

    ("She sings a song", "Elle chante une chanson"),

    ("We watch a movie together", "Nous regardons un film ensemble"),

    ("He sleeps deeply", "Il dort profondément"),

    ("They travel to Paris", "Ils voyagent à Paris"),

    ("The children play in the park", "Les enfants jouent dans le parc"),

    ("She walks along the beach", "Elle se promène le long de la plage"),

    ("We talk on the phone", "Nous parlons au téléphone"),

    ("He waits for the bus", "Il attend le bus"),

    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),

    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),

    ("She dreams of flying", "Elle rêve de voler"),

    ("We work in the office", "Nous travaillons au bureau"),

    ("He studies history", "Il étudie l'histoire"),

    ("They listen to the radio", "Ils écoutent la radio"),

    ("The wind blows gently", "Le vent souffle doucement"),

    ("She swims in the ocean", "Elle nage dans l'océan"),

    ("We dance at the wedding", "Nous dansons au mariage"),

    ("He climbs the mountain", "Il gravit la montagne"),

    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),

    ("The cat meows loudly", "Le chat miaule bruyamment"),

    ("She paints a picture", "Elle peint un tableau"),

    ("We build a sandcastle", "Nous construisons un château de sable"),

    ("He sings in the choir", "Il chante dans le chœur")

]

# Tokenization and vocabulary building
def tokenize(text):
    return text.lower().split()

def build_vocab(sentences):
    tokens = [token for sentence in sentences for token in sentence]
    vocab = {'<PAD>': 0, '<SOS>': 1, '<EOS>': 2}
    vocab.update({token: i+3 for i, token in enumerate(set(tokens))})
    return vocab

tokenized_en = [tokenize(en) for en, fr in english_to_french]
tokenized_fr = [tokenize(fr) for en, fr in english_to_french]
en_vocab = build_vocab(tokenized_en)
fr_vocab = build_vocab(tokenized_fr)
rev_fr_vocab = {v: k for k, v in fr_vocab.items()}

# Custom dataset class
class TranslationDataset(Dataset):
    def __init__(self, src_sentences, tgt_sentences, src_vocab, tgt_vocab):
        self.src_sentences = [[src_vocab[token] for token in ['<SOS>'] + sentence + ['<EOS>']] for sentence in src_sentences]
        self.tgt_sentences = [[tgt_vocab[token] for token in ['<SOS>'] + sentence + ['<EOS>']] for sentence in tgt_sentences]

    def __len__(self):
        return len(self.src_sentences)

    def __getitem__(self, idx):
        src_sentence = self.src_sentences[idx]
        tgt_sentence = self.tgt_sentences[idx]
        return torch.tensor(src_sentence, dtype=torch.long), torch.tensor(tgt_sentence, dtype=torch.long)

# Padding function for batch processing
def pad_collate(batch):
    src_batch, tgt_batch = zip(*batch)
    src_lens = [len(seq) for seq in src_batch]
    tgt_lens = [len(seq) for seq in tgt_batch]
    src_max = max(src_lens)
    tgt_max = max(tgt_lens)
    src_padded = torch.nn.utils.rnn.pad_sequence(src_batch, batch_first=True, padding_value=en_vocab['<PAD>'])
    tgt_padded = torch.nn.utils.rnn.pad_sequence(tgt_batch, batch_first=True, padding_value=fr_vocab['<PAD>'])
    return src_padded, tgt_padded, torch.tensor(src_lens), torch.tensor(tgt_lens)

# Create datasets and dataloaders
train_dataset = TranslationDataset(tokenized_en, tokenized_fr, en_vocab, fr_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=pad_collate)

# GRU-based encoder and decoder models
class EncoderGRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, -1, self.hidden_size)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

class DecoderGRU(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, -1, self.hidden_size)
        output = torch.relu(embedded)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

# Training and evaluation functions
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=10):
    encoder_hidden = encoder.initHidden(input_tensor.size(0))

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(1)
    target_length = target_tensor.size(1)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)

    decoder_input = torch.tensor([[fr_vocab['<SOS>']] for _ in range(input_tensor.size(0))])
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[:, di])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

def evaluate(encoder, decoder, dataloader, criterion):
    encoder.eval()
    decoder.eval()

    total_loss = 0

    with torch.no_grad():
        for input_tensor, target_tensor, _, _ in dataloader:
            encoder_hidden = encoder.initHidden(input_tensor.size(0))

            input_length = input_tensor.size(1)
            target_length = target_tensor.size(1)

            loss = 0

            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)

            decoder_input = torch.tensor([[fr_vocab['<SOS>']] for _ in range(input_tensor.size(0))])
            decoder_hidden = encoder_hidden

            for di in range(target_length):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()

                loss += criterion(decoder_output, target_tensor[:, di])

            total_loss += loss.item() / target_length

    return total_loss / len(dataloader)

# Main training loop
n_epochs = 150
learning_rate = 0.05

encoder = EncoderGRU(len(en_vocab), 512)
decoder = DecoderGRU(512, len(fr_vocab))
encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
criterion = nn.NLLLoss(ignore_index=fr_vocab['<PAD>'])

for epoch in range(n_epochs):
    encoder.train()
    decoder.train()

    total_loss = 0
    for input_tensor, target_tensor, _, _ in train_dataloader:
        loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        total_loss += loss
    
    # Evaluate on the training set for simplicity, in a real scenario you should use a validation set
    if (epoch ) % 20 == 0:
        validation_loss = evaluate(encoder, decoder, train_dataloader, criterion)
        print(f'Epoch {epoch+1}, Training Loss: {total_loss / len(train_dataloader)}, Validation Loss: {validation_loss}')

# Qualitative validation: translating some English sentences to French
def translate(encoder, decoder, sentence, en_vocab, fr_vocab, rev_fr_vocab, max_length=10):
    with torch.no_grad():
        # Lowercase the sentence before tokenizing
        input_tensor = torch.tensor([[en_vocab[word] for word in sentence.lower().split()]], dtype=torch.long)
        encoder_hidden = encoder.initHidden(1)

        for ei in range(input_tensor.size(1)):
            encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)

        decoder_input = torch.tensor([[fr_vocab['<SOS>']]])
        decoder_hidden = encoder_hidden

        translated_words = []
        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            if topi.item() == fr_vocab['<EOS>']:
                break
            else:
                translated_words.append(rev_fr_vocab[topi.item()])

            decoder_input = topi.squeeze().detach()

        return ' '.join(translated_words)


# Translate some example sentences
example_sentences = ["He studies history", "They visit the Eiffel Tower", "He climbs the mountain"]
for sentence in example_sentences:
    translation = translate(encoder, decoder, sentence, en_vocab, fr_vocab, rev_fr_vocab)
    print(f'{sentence} -> {translation}')


Epoch 1, Training Loss: 8.573744360695326, Validation Loss: 6.318465764340212
Epoch 21, Training Loss: 1.0144437960740635, Validation Loss: 0.9603681597109309
Epoch 41, Training Loss: 0.061549888034908885, Validation Loss: 0.13033430714216646
Epoch 61, Training Loss: 0.008658872170845506, Validation Loss: 0.008734908177628899
Epoch 81, Training Loss: 0.005670040274283501, Validation Loss: 0.00558836516541863
Epoch 101, Training Loss: 0.003967637190454832, Validation Loss: 0.004165294005415936
Epoch 121, Training Loss: 0.0033632533838616314, Validation Loss: 0.003116885491829597
Epoch 141, Training Loss: 0.0026630913636184867, Validation Loss: 0.0027049206998998846
He studies history -> il étudie l'histoire
They visit the Eiffel Tower -> ils visitent la tour eiffel
He climbs the mountain -> il gravit la montagne


In [22]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from nltk.translate.bleu_score import sentence_bleu

# Sample English to French sentence pairs
english_to_french = [

    ("I am cold", "J'ai froid"),

    ("You are tired", "Tu es fatigué"),

    ("He is hungry", "Il a faim"),

    ("She is happy", "Elle est heureuse"),

    ("We are friends", "Nous sommes amis"),

    ("They are students", "Ils sont étudiants"),

    ("The cat is sleeping", "Le chat dort"),

    ("The sun is shining", "Le soleil brille"),

    ("We love music", "Nous aimons la musique"),

    ("She speaks French fluently", "Elle parle français couramment"),

    ("He enjoys reading books", "Il aime lire des livres"),

    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),

    ("The movie starts at 7 PM", "Le film commence à 19 heures"),

    ("She wears a red dress", "Elle porte une robe rouge"),

    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),

    ("He drives a blue car", "Il conduit une voiture bleue"),

    ("They visit museums often", "Ils visitent souvent des musées"),

    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),

    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),

    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),

    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),

    ("They travel around the world", "Ils voyagent autour du monde"),

    ("The book is on the table", "Le livre est sur la table"),

    ("She dances gracefully", "Elle danse avec grâce"),

    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),

    ("He works hard every day", "Il travaille dur tous les jours"),

    ("They speak different languages", "Ils parlent différentes langues"),

    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),

    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),

    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),

    ("The dog barks loudly", "Le chien aboie bruyamment"),

    ("He sings beautifully", "Il chante magnifiquement"),

    ("They swim in the pool", "Ils nagent dans la piscine"),

    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),

    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),

    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),

    ("He paints landscapes", "Il peint des paysages"),

    ("They laugh at the joke", "Ils rient de la blague"),

    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),

    ("She runs in the park", "Elle court dans le parc"),

    ("We travel by train", "Nous voyageons en train"),

    ("He writes a letter", "Il écrit une lettre"),

    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),

    ("The baby cries", "Le bébé pleure"),

    ("She studies hard for exams", "Elle étudie dur pour les examens"),

    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),

    ("He fixes the car", "Il répare la voiture"),

    ("They drink coffee in the morning", "Ils boivent du café le matin"),

    ("The sun sets in the evening", "Le soleil se couche le soir"),

    ("She dances at the party", "Elle danse à la fête"),

    ("We play music at the concert", "Nous jouons de la musique au concert"),

    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),

    ("They study French grammar", "Ils étudient la grammaire française"),

    ("The rain falls gently", "La pluie tombe doucement"),

    ("She sings a song", "Elle chante une chanson"),

    ("We watch a movie together", "Nous regardons un film ensemble"),

    ("He sleeps deeply", "Il dort profondément"),

    ("They travel to Paris", "Ils voyagent à Paris"),

    ("The children play in the park", "Les enfants jouent dans le parc"),

    ("She walks along the beach", "Elle se promène le long de la plage"),

    ("We talk on the phone", "Nous parlons au téléphone"),

    ("He waits for the bus", "Il attend le bus"),

    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),

    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),

    ("She dreams of flying", "Elle rêve de voler"),

    ("We work in the office", "Nous travaillons au bureau"),

    ("He studies history", "Il étudie l'histoire"),

    ("They listen to the radio", "Ils écoutent la radio"),

    ("The wind blows gently", "Le vent souffle doucement"),

    ("She swims in the ocean", "Elle nage dans l'océan"),

    ("We dance at the wedding", "Nous dansons au mariage"),

    ("He climbs the mountain", "Il gravit la montagne"),

    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),

    ("The cat meows loudly", "Le chat miaule bruyamment"),

    ("She paints a picture", "Elle peint un tableau"),

    ("We build a sandcastle", "Nous construisons un château de sable"),

    ("He sings in the choir", "Il chante dans le chœur")

]

# Tokenization and vocabulary building
def tokenize(text):
    return text.lower().split()

def build_vocab(sentences):
    tokens = [token for sentence in sentences for token in sentence]
    vocab = {'<PAD>': 0, '<SOS>': 1, '<EOS>': 2}
    vocab.update({token: i+3 for i, token in enumerate(set(tokens))})
    return vocab

tokenized_en = [tokenize(en) for en, fr in english_to_french]
tokenized_fr = [tokenize(fr) for en, fr in english_to_french]
en_vocab = build_vocab(tokenized_en)
fr_vocab = build_vocab(tokenized_fr)
rev_fr_vocab = {v: k for k, v in fr_vocab.items()}

# Custom dataset class
class TranslationDataset(Dataset):
    def __init__(self, src_sentences, tgt_sentences, src_vocab, tgt_vocab):
        self.src_sentences = [[src_vocab[token] for token in ['<SOS>'] + sentence + ['<EOS>']] for sentence in src_sentences]
        self.tgt_sentences = [[tgt_vocab[token] for token in ['<SOS>'] + sentence + ['<EOS>']] for sentence in tgt_sentences]

    def __len__(self):
        return len(self.src_sentences)

    def __getitem__(self, idx):
        src_sentence = self.src_sentences[idx]
        tgt_sentence = self.tgt_sentences[idx]
        return torch.tensor(src_sentence, dtype=torch.long), torch.tensor(tgt_sentence, dtype=torch.long)

# Padding function for batch processing
def pad_collate(batch):
    src_batch, tgt_batch = zip(*batch)
    src_lens = [len(seq) for seq in src_batch]
    tgt_lens = [len(seq) for seq in tgt_batch]
    src_max = max(src_lens)
    tgt_max = max(tgt_lens)
    src_padded = torch.nn.utils.rnn.pad_sequence(src_batch, batch_first=True, padding_value=en_vocab['<PAD>'])
    tgt_padded = torch.nn.utils.rnn.pad_sequence(tgt_batch, batch_first=True, padding_value=fr_vocab['<PAD>'])
    return src_padded, tgt_padded, torch.tensor(src_lens), torch.tensor(tgt_lens)

# Create datasets and dataloaders
train_dataset = TranslationDataset(tokenized_en, tokenized_fr, en_vocab, fr_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=pad_collate)

# GRU-based encoder
class EncoderGRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, -1, self.hidden_size)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

# GRU-based decoder with attention
class AttnDecoderGRU(nn.Module):
    def __init__(self, hidden_size, output_size, max_length):
        super(AttnDecoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, -1, self.hidden_size)
        attn_weights = torch.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs.transpose(0, 1))

        output = torch.cat((embedded[0], attn_applied.squeeze(1)), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = torch.relu(output)
        output, hidden = self.gru(output, hidden)

        output = torch.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

# Training and evaluation functions
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length):
    encoder_hidden = encoder.initHidden(input_tensor.size(0))

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(1)
    target_length = target_tensor.size(1)

    encoder_outputs = torch.zeros(max_length, input_tensor.size(0), encoder.hidden_size, device=input_tensor.device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0]

    decoder_input = torch.tensor([[fr_vocab['<SOS>']] for _ in range(input_tensor.size(0))], device=input_tensor.device)
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[:, di])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

def evaluate(encoder, decoder, dataloader, criterion, max_length):
    encoder.eval()
    decoder.eval()

    total_loss = 0

    with torch.no_grad():
        for input_tensor, target_tensor, _, _ in dataloader:
            encoder_hidden = encoder.initHidden(input_tensor.size(0))

            input_length = input_tensor.size(1)
            target_length = target_tensor.size(1)

            encoder_outputs = torch.zeros(max_length, input_tensor.size(0), encoder.hidden_size, device=input_tensor.device)

            loss = 0

            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)
                encoder_outputs[ei] = encoder_output[0]

            decoder_input = torch.tensor([[fr_vocab['<SOS>']] for _ in range(input_tensor.size(0))], device=input_tensor.device)
            decoder_hidden = encoder_hidden

            for di in range(target_length):
                decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()

                loss += criterion(decoder_output, target_tensor[:, di])

            total_loss += loss.item() / target_length

    return total_loss / len(dataloader)

# Main training loop
n_epochs = 150
learning_rate = 0.03
max_length = 10

encoder = EncoderGRU(len(en_vocab), 164)
decoder = AttnDecoderGRU(164, len(fr_vocab), max_length)
encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
criterion = nn.NLLLoss(ignore_index=fr_vocab['<PAD>'])

for epoch in range(n_epochs):
    encoder.train()
    decoder.train()

    total_loss = 0
    for input_tensor, target_tensor, _, _ in train_dataloader:
        loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length)
        total_loss += loss
    if epoch % 20 == 0: 
        validation_loss = evaluate(encoder, decoder, train_dataloader, criterion, max_length)
        print(f'Epoch {epoch+1}, Training Loss: {total_loss / len(train_dataloader)}, Validation Loss: {validation_loss}')

# Qualitative validation: translating some English sentences to French with attention
def translate_with_attention(encoder, decoder, sentence, en_vocab, fr_vocab, rev_fr_vocab, max_length):
    with torch.no_grad():
        input_tensor = torch.tensor([[en_vocab[word] for word in sentence.lower().split()]], dtype=torch.long)
        encoder_hidden = encoder.initHidden(1)

        encoder_outputs = torch.zeros(max_length, 1, encoder.hidden_size, device=input_tensor.device)

        for ei in range(input_tensor.size(1)):
            encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)
            encoder_outputs[ei] = encoder_output[0]

        decoder_input = torch.tensor([[fr_vocab['<SOS>']]], device=input_tensor.device)
        decoder_hidden = encoder_hidden

        translated_words = []
        for di in range(max_length):
            decoder_output, decoder_hidden, attn_weights = decoder(decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            if topi.item() == fr_vocab['<EOS>']:
                break
            else:
                translated_words.append(rev_fr_vocab[topi.item()])

            decoder_input = topi.squeeze().detach()

        return ' '.join(translated_words)

# Translate some example sentences with attention
example_sentences = ["He studies history", "They visit the Eiffel Tower", "The movie starts at 7 PM"]
for sentence in example_sentences:
    translation = translate_with_attention(encoder, decoder, sentence, en_vocab, fr_vocab, rev_fr_vocab, max_length)
    print(f'{sentence} -> {translation}')


Epoch 1, Training Loss: 4.356175176299152, Validation Loss: 3.893270356059934
Epoch 21, Training Loss: 2.3091502744673305, Validation Loss: 2.4364687926394257
Epoch 41, Training Loss: 0.74888315356397, Validation Loss: 0.5881966039297557
Epoch 61, Training Loss: 0.1117647099083163, Validation Loss: 0.10443463785404508
Epoch 81, Training Loss: 0.03809330692501338, Validation Loss: 0.03643889545644452
Epoch 101, Training Loss: 0.02229694298574698, Validation Loss: 0.021657901632896537
Epoch 121, Training Loss: 0.015807198155550407, Validation Loss: 0.014762386420142298
Epoch 141, Training Loss: 0.012442525985461228, Validation Loss: 0.023283462951841402
He studies history -> il aime lire la musique
They visit the Eiffel Tower -> <SOS> tu es tour eiffel
The movie starts at 7 PM -> le film commence à 19 heures


In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from nltk.translate.bleu_score import sentence_bleu

# Sample French to English sentence pairs (reversed from the original English to French pairs)
french_to_english = [
    ("J'ai froid", "I am cold"),
    ("Tu es fatigué", "You are tired"),
    ("Il a faim", "He is hungry"),
    ("Elle est heureuse", "She is happy"),
    ("Nous sommes amis", "We are friends"),
    ("Ils sont étudiants", "They are students"),
    ("Le chat dort", "The cat is sleeping"),
    ("Le soleil brille", "The sun is shining"),
    ("Nous aimons la musique", "We love music"),
    ("Elle parle français couramment", "She speaks French fluently"),
    ("Il aime lire des livres", "He enjoys reading books"),
    ("Ils jouent au football chaque week-end", "They play soccer every weekend"),
    ("Le film commence à 19 heures", "The movie starts at 7 PM"),
    ("Elle porte une robe rouge", "She wears a red dress"),
    ("Nous cuisinons le dîner ensemble", "We cook dinner together"),
    ("Il conduit une voiture bleue", "He drives a blue car"),
    ("Ils visitent souvent des musées", "They visit museums often"),
    ("Le restaurant sert une délicieuse cuisine", "The restaurant serves delicious food"),
    ("Elle étudie les mathématiques à l'université", "She studies mathematics at university"),
    ("Nous regardons des films le vendredi", "We watch movies on Fridays"),
    ("Il écoute de la musique en faisant du jogging", "He listens to music while jogging"),
    ("Ils voyagent autour du monde", "They travel around the world"),
    ("Le livre est sur la table", "The book is on the table"),
    ("Elle danse avec grâce", "She dances gracefully"),
    ("Nous célébrons les anniversaires avec un gâteau", "We celebrate birthdays with cake"),
    ("Il travaille dur tous les jours", "He works hard every day"),
    ("Ils parlent différentes langues", "They speak different languages"),
    ("Les fleurs fleurissent au printemps", "The flowers bloom in spring"),
    ("Elle écrit de la poésie pendant son temps libre", "She writes poetry in her free time"),
    ("Nous apprenons quelque chose de nouveau chaque jour", "We learn something new every day"),
    ("Le chien aboie bruyamment", "The dog barks loudly"),
    ("Il chante magnifiquement", "He sings beautifully"),
    ("Ils nagent dans la piscine", "They swim in the pool"),
    ("Les oiseaux gazouillent le matin", "The birds chirp in the morning"),
    ("Elle enseigne l'anglais à l'école", "She teaches English at school"),
    ("Nous prenons le petit déjeuner ensemble", "We eat breakfast together"),
    ("Il peint des paysages", "He paints landscapes"),
    ("Ils rient de la blague", "They laugh at the joke"),
    ("L'horloge tic-tac bruyamment", "The clock ticks loudly"),
    ("Elle court dans le parc", "She runs in the park"),
    ("Nous voyageons en train", "We travel by train"),
    ("Il écrit une lettre", "He writes a letter"),
    ("Ils lisent des livres à la bibliothèque", "They read books at the library"),
    ("Le bébé pleure", "The baby cries"),
    ("Elle étudie dur pour les examens", "She studies hard for exams"),
    ("Nous plantons des fleurs dans le jardin", "We plant flowers in the garden"),
    ("Il répare la voiture", "He fixes the car"),
    ("Ils boivent du café le matin", "They drink coffee in the morning"),
    ("Le soleil se couche le soir", "The sun sets in the evening"),
    ("Elle danse à la fête", "She dances at the party"),
    ("Nous jouons de la musique au concert", "We play music at the concert"),
    ("Il cuisine le dîner pour sa famille", "He cooks dinner for his family"),
    ("Ils étudient la grammaire française", "They study French grammar"),
    ("La pluie tombe doucement", "The rain falls gently"),
    ("Elle chante une chanson", "She sings a song"),
    ("Nous regardons un film ensemble", "We watch a movie together"),
    ("Il dort profondément", "He sleeps deeply"),
    ("Ils voyagent à Paris", "They travel to Paris"),
    ("Les enfants jouent dans le parc", "The children play in the park"),
    ("Elle se promène le long de la plage", "She walks along the beach"),
    ("Nous parlons au téléphone", "We talk on the phone"),
    ("Il attend le bus", "He waits for the bus"),
    ("Ils visitent la tour Eiffel", "They visit the Eiffel Tower"),
    ("Les étoiles scintillent la nuit", "The stars twinkle at night"),
    ("Elle rêve de voler", "She dreams of flying"),
    ("Nous travaillons au bureau", "We work in the office"),
    ("Il étudie l'histoire", "He studies history"),
    ("Ils écoutent la radio", "They listen to the radio"),
    ("Le vent souffle doucement", "The wind blows gently"),
    ("Elle nage dans l'océan", "She swims in the ocean"),
    ("Nous dansons au mariage", "We dance at the wedding"),
    ("Il gravit la montagne", "He climbs the mountain"),
    ("Ils font de la randonnée dans la forêt", "They hike in the forest"),
    ("Le chat miaule bruyamment", "The cat meows loudly"),
    ("Elle peint un tableau", "She paints a picture"),
    ("Nous construisons un château de sable", "We build a sandcastle"),
    ("Il chante dans le chœur", "He sings in the choir")
]


def tokenize(text):
    return text.lower().split()

def build_vocab(sentences):
    tokens = [token for sentence in sentences for token in sentence]
    vocab = {'<PAD>': 0, '<SOS>': 1, '<EOS>': 2}
    vocab.update({token: i+3 for i, token in enumerate(set(tokens))})
    return vocab

tokenized_fr = [tokenize(fr) for fr, en in french_to_english]
tokenized_en = [tokenize(en) for fr, en in french_to_english]
fr_vocab = build_vocab(tokenized_fr)
en_vocab = build_vocab(tokenized_en)
rev_en_vocab = {v: k for k, v in en_vocab.items()}

class TranslationDataset(Dataset):
    def __init__(self, src_sentences, tgt_sentences, src_vocab, tgt_vocab):
        self.src_sentences = [[src_vocab[token] for token in ['<SOS>'] + sentence + ['<EOS>']] for sentence in src_sentences]
        self.tgt_sentences = [[tgt_vocab[token] for token in ['<SOS>'] + sentence + ['<EOS>']] for sentence in tgt_sentences]

    def __len__(self):
        return len(self.src_sentences)

    def __getitem__(self, idx):
        src_sentence = self.src_sentences[idx]
        tgt_sentence = self.tgt_sentences[idx]
        return torch.tensor(src_sentence, dtype=torch.long), torch.tensor(tgt_sentence, dtype=torch.long)

def pad_collate(batch):
    src_batch, tgt_batch = zip(*batch)
    src_lens = [len(seq) for seq in src_batch]
    tgt_lens = [len(seq) for seq in tgt_batch]
    src_max = max(src_lens)
    tgt_max = max(tgt_lens)
    src_padded = torch.nn.utils.rnn.pad_sequence(src_batch, batch_first=True, padding_value=fr_vocab['<PAD>'])
    tgt_padded = torch.nn.utils.rnn.pad_sequence(tgt_batch, batch_first=True, padding_value=en_vocab['<PAD>'])
    return src_padded, tgt_padded, torch.tensor(src_lens), torch.tensor(tgt_lens)

train_dataset = TranslationDataset(tokenized_fr, tokenized_en, fr_vocab, en_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=pad_collate)

class EncoderGRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, -1, self.hidden_size)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

class DecoderGRU(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, -1, self.hidden_size)
        output = torch.relu(embedded)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length):
    encoder_hidden = encoder.initHidden(input_tensor.size(0))

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(1)
    target_length = target_tensor.size(1)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)

    decoder_input = torch.tensor([[en_vocab['<SOS>']] for _ in range(input_tensor.size(0))])
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[:, di])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

def evaluate(encoder, decoder, dataloader, criterion):
    encoder.eval()
    decoder.eval()

    total_loss = 0

    with torch.no_grad():
        for input_tensor, target_tensor, _, _ in dataloader:
            encoder_hidden = encoder.initHidden(input_tensor.size(0))

            input_length = input_tensor.size(1)
            target_length = target_tensor.size(1)

            loss = 0

            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)

            decoder_input = torch.tensor([[en_vocab['<SOS>']] for _ in range(input_tensor.size(0))])
            decoder_hidden = encoder_hidden

            for di in range(target_length):
                decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()

                loss += criterion(decoder_output, target_tensor[:, di])

            total_loss += loss.item() / target_length

    return total_loss / len(dataloader)

# Main training loop
n_epochs = 500
learning_rate = 0.03

encoder = EncoderGRU(len(fr_vocab), 164)
decoder = DecoderGRU(164 ,len(en_vocab))
encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
criterion = nn.NLLLoss(ignore_index=en_vocab['<PAD>'])

for epoch in range(n_epochs):
    encoder.train()
    decoder.train()

    total_loss = 0
    for input_tensor, target_tensor, _, _ in train_dataloader:
        loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion,10)
        total_loss += loss
    if epoch % 50 == 0:
        validation_loss = evaluate(encoder, decoder, train_dataloader, criterion)
        print(f'Epoch {epoch+1}, Training Loss: {total_loss / len(train_dataloader)}, Validation Loss: {validation_loss}')

# Qualitative validation: translating some French sentences to English
def translate(encoder, decoder, sentence, fr_vocab, en_vocab, rev_en_vocab, max_length=10):
    with torch.no_grad():
        input_tensor = torch.tensor([[fr_vocab[word] for word in sentence.lower().split()]], dtype=torch.long)
        encoder_hidden = encoder.initHidden(1)

        for ei in range(input_tensor.size(1)):
            encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)

        decoder_input = torch.tensor([[en_vocab['<SOS>']]])
        decoder_hidden = encoder_hidden

        translated_words = []
        for di in range(max_length):
            decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
            topv, topi = decoder_output.topk(1)
            if topi.item() == en_vocab['<EOS>']:
                break
            else:
                translated_words.append(rev_en_vocab[topi.item()])

            decoder_input = topi.squeeze().detach()

        return ' '.join(translated_words)

# Translate some example sentences
example_sentences = ["J'ai froid", "Tu es fatigué", "Il chante dans le chœur"]
for sentence in example_sentences:
    translation = translate(encoder, decoder, sentence, fr_vocab, en_vocab, rev_en_vocab)
    print(f'{sentence} -> {translation}')


Epoch 1, Training Loss: 4.035620164871216, Validation Loss: 3.8100338474152577
Epoch 51, Training Loss: 0.8325259286732901, Validation Loss: 0.7052007373363252
Epoch 101, Training Loss: 0.05083978123194168, Validation Loss: 0.04743419288997612
Epoch 151, Training Loss: 0.02014349309638852, Validation Loss: 0.01861792301448683
Epoch 201, Training Loss: 0.011543686747817057, Validation Loss: 0.011560111203127435
Epoch 251, Training Loss: 0.008095476417137043, Validation Loss: 0.008088337437486246
Epoch 301, Training Loss: 0.0064249163493514064, Validation Loss: 0.006164413125490742
Epoch 351, Training Loss: 0.005239803117832967, Validation Loss: 0.0049840749603592685
Epoch 401, Training Loss: 0.004510429015545736, Validation Loss: 0.004264778245447411
Epoch 451, Training Loss: 0.0036935142891865876, Validation Loss: 0.0036810269363091465
J'ai froid -> i am cold
Tu es fatigué -> you are tired
Il chante dans le chœur -> he sings in the choir


In [20]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from nltk.translate.bleu_score import sentence_bleu

# Sample French to English sentence pairs
french_to_english = [
    ("J'ai froid", "I am cold"),
    ("Tu es fatigué", "You are tired"),
    ("Il a faim", "He is hungry"),
    ("Elle est heureuse", "She is happy"),
    ("Nous sommes amis", "We are friends"),
    ("Ils sont étudiants", "They are students"),
    ("Le chat dort", "The cat is sleeping"),
    ("Le soleil brille", "The sun is shining"),
    ("Nous aimons la musique", "We love music"),
    ("Elle parle français couramment", "She speaks French fluently"),
    ("Il aime lire des livres", "He enjoys reading books"),
    ("Ils jouent au football chaque week-end", "They play soccer every weekend"),
    ("Le film commence à 19 heures", "The movie starts at 7 PM"),
    ("Elle porte une robe rouge", "She wears a red dress"),
    ("Nous cuisinons le dîner ensemble", "We cook dinner together"),
    ("Il conduit une voiture bleue", "He drives a blue car"),
    ("Ils visitent souvent des musées", "They visit museums often"),
    ("Le restaurant sert une délicieuse cuisine", "The restaurant serves delicious food"),
    ("Elle étudie les mathématiques à l'université", "She studies mathematics at university"),
    ("Nous regardons des films le vendredi", "We watch movies on Fridays"),
    ("Il écoute de la musique en faisant du jogging", "He listens to music while jogging"),
    ("Ils voyagent autour du monde", "They travel around the world"),
    ("Le livre est sur la table", "The book is on the table"),
    ("Elle danse avec grâce", "She dances gracefully"),
    ("Nous célébrons les anniversaires avec un gâteau", "We celebrate birthdays with cake"),
    ("Il travaille dur tous les jours", "He works hard every day"),
    ("Ils parlent différentes langues", "They speak different languages"),
    ("Les fleurs fleurissent au printemps", "The flowers bloom in spring"),
    ("Elle écrit de la poésie pendant son temps libre", "She writes poetry in her free time"),
    ("Nous apprenons quelque chose de nouveau chaque jour", "We learn something new every day"),
    ("Le chien aboie bruyamment", "The dog barks loudly"),
    ("Il chante magnifiquement", "He sings beautifully"),
    ("Ils nagent dans la piscine", "They swim in the pool"),
    ("Les oiseaux gazouillent le matin", "The birds chirp in the morning"),
    ("Elle enseigne l'anglais à l'école", "She teaches English at school"),
    ("Nous prenons le petit déjeuner ensemble", "We eat breakfast together"),
    ("Il peint des paysages", "He paints landscapes"),
    ("Ils rient de la blague", "They laugh at the joke"),
    ("L'horloge tic-tac bruyamment", "The clock ticks loudly"),
    ("Elle court dans le parc", "She runs in the park"),
    ("Nous voyageons en train", "We travel by train"),
    ("Il écrit une lettre", "He writes a letter"),
    ("Ils lisent des livres à la bibliothèque", "They read books at the library"),
    ("Le bébé pleure", "The baby cries"),
    ("Elle étudie dur pour les examens", "She studies hard for exams"),
    ("Nous plantons des fleurs dans le jardin", "We plant flowers in the garden"),
    ("Il répare la voiture", "He fixes the car"),
    ("Ils boivent du café le matin", "They drink coffee in the morning"),
    ("Le soleil se couche le soir", "The sun sets in the evening"),
    ("Elle danse à la fête", "She dances at the party"),
    ("Nous jouons de la musique au concert", "We play music at the concert"),
    ("Il cuisine le dîner pour sa famille", "He cooks dinner for his family"),
    ("Ils étudient la grammaire française", "They study French grammar"),
    ("La pluie tombe doucement", "The rain falls gently"),
    ("Elle chante une chanson", "She sings a song"),
    ("Nous regardons un film ensemble", "We watch a movie together"),
    ("Il dort profondément", "He sleeps deeply"),
    ("Ils voyagent à Paris", "They travel to Paris"),
    ("Les enfants jouent dans le parc", "The children play in the park"),
    ("Elle se promène le long de la plage", "She walks along the beach"),
    ("Nous parlons au téléphone", "We talk on the phone"),
    ("Il attend le bus", "He waits for the bus"),
    ("Ils visitent la tour Eiffel", "They visit the Eiffel Tower"),
    ("Les étoiles scintillent la nuit", "The stars twinkle at night"),
    ("Elle rêve de voler", "She dreams of flying"),
    ("Nous travaillons au bureau", "We work in the office"),
    ("Il étudie l'histoire", "He studies history"),
    ("Ils écoutent la radio", "They listen to the radio"),
    ("Le vent souffle doucement", "The wind blows gently"),
    ("Elle nage dans l'océan", "She swims in the ocean"),
    ("Nous dansons au mariage", "We dance at the wedding"),
    ("Il gravit la montagne", "He climbs the mountain"),
    ("Ils font de la randonnée dans la forêt", "They hike in the forest"),
    ("Le chat miaule bruyamment", "The cat meows loudly"),
    ("Elle peint un tableau", "She paints a picture"),
    ("Nous construisons un château de sable", "We build a sandcastle"),
    ("Il chante dans le chœur", "He sings in the choir")
]

def tokenize(text):
    return text.lower().split()

def build_vocab(sentences):
    tokens = [token for sentence in sentences for token in sentence]
    vocab = {'<PAD>': 0, '<SOS>': 1, '<EOS>': 2}
    vocab.update({token: i+3 for i, token in enumerate(set(tokens))})
    return vocab

tokenized_fr = [tokenize(fr) for fr, en in french_to_english]
tokenized_en = [tokenize(en) for fr, en in french_to_english]
fr_vocab = build_vocab(tokenized_fr)
en_vocab = build_vocab(tokenized_en)
rev_en_vocab = {v: k for k, v in en_vocab.items()}

class TranslationDataset(Dataset):
    def __init__(self, src_sentences, tgt_sentences, src_vocab, tgt_vocab):
        self.src_sentences = [[src_vocab[token] for token in ['<SOS>'] + sentence + ['<EOS>']] for sentence in src_sentences]
        self.tgt_sentences = [[tgt_vocab[token] for token in ['<SOS>'] + sentence + ['<EOS>']] for sentence in tgt_sentences]

    def __len__(self):
        return len(self.src_sentences)

    def __getitem__(self, idx):
        src_sentence = self.src_sentences[idx]
        tgt_sentence = self.tgt_sentences[idx]
        return torch.tensor(src_sentence, dtype=torch.long), torch.tensor(tgt_sentence, dtype=torch.long)

def pad_collate(batch):
    src_batch, tgt_batch = zip(*batch)
    src_lens = [len(seq) for seq in src_batch]
    tgt_lens = [len(seq) for seq in tgt_batch]
    src_max = max(src_lens)
    tgt_max = max(tgt_lens)
    src_padded = torch.nn.utils.rnn.pad_sequence(src_batch, batch_first=True, padding_value=fr_vocab['<PAD>'])
    tgt_padded = torch.nn.utils.rnn.pad_sequence(tgt_batch, batch_first=True, padding_value=en_vocab['<PAD>'])
    return src_padded, tgt_padded, torch.tensor(src_lens), torch.tensor(tgt_lens)

train_dataset = TranslationDataset(tokenized_fr, tokenized_en, fr_vocab, en_vocab)
train_dataloader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=pad_collate)

class EncoderGRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, -1, self.hidden_size)
        output, hidden = self.gru(embedded, hidden)
        return output, hidden

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

class AttnDecoderGRU(nn.Module):
    def __init__(self, hidden_size, output_size, max_length):
        super(AttnDecoderGRU, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, -1, self.hidden_size)
        attn_weights = torch.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs.transpose(0, 1))

        output = torch.cat((embedded[0], attn_applied.squeeze(1)), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = torch.relu(output)
        output, hidden = self.gru(output, hidden)

        output = torch.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length):
    encoder_hidden = encoder.initHidden(input_tensor.size(0))

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(1)
    target_length = target_tensor.size(1)

    encoder_outputs = torch.zeros(max_length, input_tensor.size(0), encoder.hidden_size, device=input_tensor.device)

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0]

    decoder_input = torch.tensor([[en_vocab['<SOS>']] for _ in range(input_tensor.size(0))], device=input_tensor.device)
    decoder_hidden = encoder_hidden

    for di in range(target_length):
        decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()

        loss += criterion(decoder_output, target_tensor[:, di])

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length

def evaluate(encoder, decoder, dataloader, criterion, max_length):
    encoder.eval()
    decoder.eval()

    total_loss = 0

    with torch.no_grad():
        for input_tensor, target_tensor, _, _ in dataloader:
            encoder_hidden = encoder.initHidden(input_tensor.size(0))

            input_length = input_tensor.size(1)
            target_length = target_tensor.size(1)

            encoder_outputs = torch.zeros(max_length, input_tensor.size(0), encoder.hidden_size, device=input_tensor.device)

            loss = 0

            for ei in range(input_length):
                encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)
                encoder_outputs[ei] = encoder_output[0]

            decoder_input = torch.tensor([[en_vocab['<SOS>']] for _ in range(input_tensor.size(0))], device=input_tensor.device)
            decoder_hidden = encoder_hidden

            for di in range(target_length):
                decoder_output, decoder_hidden, _ = decoder(decoder_input, decoder_hidden, encoder_outputs)
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach()

                loss += criterion(decoder_output, target_tensor[:, di])

            total_loss += loss.item() / target_length

    return total_loss / len(dataloader)

n_epochs = 250
learning_rate = 0.03
max_length = 25

encoder = EncoderGRU(len(fr_vocab), 164)
decoder = AttnDecoderGRU(164, len(en_vocab), max_length)
encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
criterion = nn.NLLLoss(ignore_index=en_vocab['<PAD>'])

for epoch in range(n_epochs):
    encoder.train()
    decoder.train()

    total_loss = 0
    for input_tensor, target_tensor, _, _ in train_dataloader:
        loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length)
        total_loss += loss
    if (epoch % 20 ) ==0:
        validation_loss = evaluate(encoder, decoder, train_dataloader, criterion, max_length)
        print(f'Epoch {epoch+1}, Training Loss: {total_loss / len(train_dataloader)}, Validation Loss: {validation_loss}')

def translate_with_attention(encoder, decoder, sentence, fr_vocab, en_vocab, rev_en_vocab, max_length):
    with torch.no_grad():
        input_tensor = torch.tensor([[fr_vocab[word] for word in sentence.lower().split()]], dtype=torch.long)
        encoder_hidden = encoder.initHidden(1)

        encoder_outputs = torch.zeros(max_length, 1, encoder.hidden_size, device=input_tensor.device)

        for ei in range(input_tensor.size(1)):
            encoder_output, encoder_hidden = encoder(input_tensor[:, ei], encoder_hidden)
            encoder_outputs[ei] = encoder_output[0]

        decoder_input = torch.tensor([[en_vocab['<SOS>']]], device=input_tensor.device)
        decoder_hidden = encoder_hidden

        translated_words = []
        for di in range(max_length):
            decoder_output, decoder_hidden, attn_weights = decoder(decoder_input, decoder_hidden, encoder_outputs)
            topv, topi = decoder_output.topk(1)
            if topi.item() == en_vocab['<EOS>']:
                break
            else:
                translated_words.append(rev_en_vocab[topi.item()])

            decoder_input = topi.squeeze().detach()

        return ' '.join(translated_words)

example_sentences = ["J'ai froid", "Tu es fatigué", "Il a faim"]
for sentence in example_sentences:
    translation = translate_with_attention(encoder, decoder, sentence, fr_vocab, en_vocab, rev_en_vocab, max_length)
    print(f'{sentence} -> {translation}')


Epoch 1, Training Loss: 4.23929743312654, Validation Loss: 3.563293671645815
Epoch 21, Training Loss: 2.320236312150955, Validation Loss: 2.605328211708674
Epoch 41, Training Loss: 0.9618941412085578, Validation Loss: 0.8379886672610329
Epoch 61, Training Loss: 0.16432976705569122, Validation Loss: 0.15104681035828968
Epoch 81, Training Loss: 0.10278267372340437, Validation Loss: 0.1595668133401445
Epoch 101, Training Loss: 0.0337471158066321, Validation Loss: 0.028998730362703402
Epoch 121, Training Loss: 0.07798305818485836, Validation Loss: 0.027602618320712025
Epoch 141, Training Loss: 0.016358968084086732, Validation Loss: 0.014961484856607893
Epoch 161, Training Loss: 0.012502744897372192, Validation Loss: 0.01162970983645036
Epoch 181, Training Loss: 0.011268529025394292, Validation Loss: 0.010423154022455925
Epoch 201, Training Loss: 0.008039396035573668, Validation Loss: 0.008021185949577816
Epoch 221, Training Loss: 0.007743354765920056, Validation Loss: 0.007563026141493567
