In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.nn.utils.rnn import pad_sequence
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [11]:
english_to_french = [
    ("I am cold", "J'ai froid"),
    ("You are tired", "Tu es fatigué"),
    ("He is hungry", "Il a faim"),
    ("She is happy", "Elle est heureuse"),
    ("We are friends", "Nous sommes amis"),
    ("They are students", "Ils sont étudiants"),
    ("The cat is sleeping", "Le chat dort"),
    ("The sun is shining", "Le soleil brille"),
    ("We love music", "Nous aimons la musique"),
    ("She speaks French fluently", "Elle parle français couramment"),
    ("He enjoys reading books", "Il aime lire des livres"),
    ("They play soccer every weekend", "Ils jouent au football chaque week-end"),
    ("The movie starts at 7 PM", "Le film commence à 19 heures"),
    ("She wears a red dress", "Elle porte une robe rouge"),
    ("We cook dinner together", "Nous cuisinons le dîner ensemble"),
    ("He drives a blue car", "Il conduit une voiture bleue"),
    ("They visit museums often", "Ils visitent souvent des musées"),
    ("The restaurant serves delicious food", "Le restaurant sert une délicieuse cuisine"),
    ("She studies mathematics at university", "Elle étudie les mathématiques à l'université"),
    ("We watch movies on Fridays", "Nous regardons des films le vendredi"),
    ("He listens to music while jogging", "Il écoute de la musique en faisant du jogging"),
    ("They travel around the world", "Ils voyagent autour du monde"),
    ("The book is on the table", "Le livre est sur la table"),
    ("She dances gracefully", "Elle danse avec grâce"),
    ("We celebrate birthdays with cake", "Nous célébrons les anniversaires avec un gâteau"),
    ("He works hard every day", "Il travaille dur tous les jours"),
    ("They speak different languages", "Ils parlent différentes langues"),
    ("The flowers bloom in spring", "Les fleurs fleurissent au printemps"),
    ("She writes poetry in her free time", "Elle écrit de la poésie pendant son temps libre"),
    ("We learn something new every day", "Nous apprenons quelque chose de nouveau chaque jour"),
    ("The dog barks loudly", "Le chien aboie bruyamment"),
    ("He sings beautifully", "Il chante magnifiquement"),
    ("They swim in the pool", "Ils nagent dans la piscine"),
    ("The birds chirp in the morning", "Les oiseaux gazouillent le matin"),
    ("She teaches English at school", "Elle enseigne l'anglais à l'école"),
    ("We eat breakfast together", "Nous prenons le petit déjeuner ensemble"),
    ("He paints landscapes", "Il peint des paysages"),
    ("They laugh at the joke", "Ils rient de la blague"),
    ("The clock ticks loudly", "L'horloge tic-tac bruyamment"),
    ("She runs in the park", "Elle court dans le parc"),
    ("We travel by train", "Nous voyageons en train"),
    ("He writes a letter", "Il écrit une lettre"),
    ("They read books at the library", "Ils lisent des livres à la bibliothèque"),
    ("The baby cries", "Le bébé pleure"),
    ("She studies hard for exams", "Elle étudie dur pour les examens"),
    ("We plant flowers in the garden", "Nous plantons des fleurs dans le jardin"),
    ("He fixes the car", "Il répare la voiture"),
    ("They drink coffee in the morning", "Ils boivent du café le matin"),
    ("The sun sets in the evening", "Le soleil se couche le soir"),
    ("She dances at the party", "Elle danse à la fête"),
    ("We play music at the concert", "Nous jouons de la musique au concert"),
    ("He cooks dinner for his family", "Il cuisine le dîner pour sa famille"),
    ("They study French grammar", "Ils étudient la grammaire française"),
    ("The rain falls gently", "La pluie tombe doucement"),
    ("She sings a song", "Elle chante une chanson"),
    ("We watch a movie together", "Nous regardons un film ensemble"),
    ("He sleeps deeply", "Il dort profondément"),
    ("They travel to Paris", "Ils voyagent à Paris"),
    ("The children play in the park", "Les enfants jouent dans le parc"),
    ("She walks along the beach", "Elle se promène le long de la plage"),
    ("We talk on the phone", "Nous parlons au téléphone"),
    ("He waits for the bus", "Il attend le bus"),
    ("They visit the Eiffel Tower", "Ils visitent la tour Eiffel"),
    ("The stars twinkle at night", "Les étoiles scintillent la nuit"),
    ("She dreams of flying", "Elle rêve de voler"),
    ("We work in the office", "Nous travaillons au bureau"),
    ("He studies history", "Il étudie l'histoire"),
    ("They listen to the radio", "Ils écoutent la radio"),
    ("The wind blows gently", "Le vent souffle doucement"),
    ("She swims in the ocean", "Elle nage dans l'océan"),
    ("We dance at the wedding", "Nous dansons au mariage"),
    ("He climbs the mountain", "Il gravit la montagne"),
    ("They hike in the forest", "Ils font de la randonnée dans la forêt"),
    ("The cat meows loudly", "Le chat miaule bruyamment"),
    ("She paints a picture", "Elle peint un tableau"),
    ("We build a sandcastle", "Nous construisons un château de sable"),
    ("He sings in the choir", "Il chante dans le chœur")
]
     

In [14]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
import torch.optim as optim

# Constants for special tokens
SOS_token = 0  # Start Of Sequence Token
EOS_token = 1  # End Of Sequence Token

# Assume english_to_french is defined elsewhere
french_to_english = [(french, english) for english, french in english_to_french]

# Word to index mapping for reversed dataset
word_to_index = {"SOS": SOS_token, "EOS": EOS_token}
for pair in french_to_english:
    for word in pair[0].split() + pair[1].split():
        if word not in word_to_index:
            word_to_index[word] = len(word_to_index)

# Dataset class for handling translation data
class TranslationDataset(Dataset):
    def __init__(self, dataset, word_to_index):
        self.dataset = dataset
        self.word_to_index = word_to_index

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        target_sentence, input_sentence = self.dataset[idx]  # Notice target and input are reversed
        input_indices = [self.word_to_index[word] for word in input_sentence.split()] + [EOS_token]
        target_indices = [self.word_to_index[word] for word in target_sentence.split()] + [EOS_token]
        return torch.tensor(input_indices, dtype=torch.long), torch.tensor(target_indices, dtype=torch.long)

# Custom collate function to handle padding
def collate_batch(batch):
    input_tensors, target_tensors = zip(*batch)
    input_tensors_padded = pad_sequence(input_tensors, batch_first=True, padding_value=EOS_token)
    target_tensors_padded = pad_sequence(target_tensors, batch_first=True, padding_value=EOS_token)
    return input_tensors_padded, target_tensors_padded

# Transformer Model
class TranslationModel(nn.Module):
    def __init__(self, vocab_size, hidden_size, num_layers=4, num_heads=8, dropout=0.1):
        super(TranslationModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.transformer = nn.Transformer(
            d_model=hidden_size,
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            dim_feedforward=hidden_size * 4,
            dropout=dropout,
            batch_first=True
        )
        self.fc_out = nn.Linear(hidden_size, vocab_size)

    def forward(self, input, target):
        embedded_input = self.embedding(input)
        embedded_target = self.embedding(target)
        tgt_mask = self.transformer.generate_square_subsequent_mask(target.size(1)).to(target.device)
        transformer_output = self.transformer(embedded_input, embedded_target, tgt_mask=tgt_mask)
        output = self.fc_out(transformer_output)
        return output

# Train and evaluate function
def train_and_evaluate(model, dataloader, optimizer, criterion, epochs, device):
    for epoch in range(epochs):
        model.train()
        train_loss, train_correct, train_total = 0, 0, 0
        for input_tensor, target_tensor in dataloader:
            input_tensor, target_tensor = input_tensor.to(device), target_tensor.to(device)

            optimizer.zero_grad()
            output = model(input_tensor, target_tensor[:, :-1])
            output_flat = output.view(-1, output.size(-1))
            target_flat = target_tensor[:, 1:].contiguous().view(-1)

            # Exclude EOS from loss calculation
            non_eos_mask = target_flat != EOS_token
            loss = criterion(output_flat[non_eos_mask], target_flat[non_eos_mask])
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = torch.max(output_flat, 1)
            correct_mask = (predicted == target_flat) & non_eos_mask
            train_correct += correct_mask.sum().item()
            train_total += non_eos_mask.sum().item()

        avg_train_loss = train_loss / len(dataloader)
        train_accuracy = train_correct / train_total if train_total > 0 else 0

        # Evaluate on the same dataset (for demonstration, typically use a separate validation set)
        model.eval()
        eval_loss, eval_correct, eval_total = 0, 0, 0
        with torch.no_grad():
            for input_tensor, target_tensor in dataloader:
                input_tensor, target_tensor = input_tensor.to(device), target_tensor.to(device)
                output = model(input_tensor, target_tensor[:, :-1])
                output_flat = output.view(-1, output.size(-1))
                target_flat = target_tensor[:, 1:].contiguous().view(-1)

                non_eos_mask = target_flat != EOS_token
                loss = criterion(output_flat[non_eos_mask], target_flat[non_eos_mask])
                eval_loss += loss.item()
                _, predicted = torch.max(output_flat, 1)
                correct_mask = (predicted == target_flat) & non_eos_mask
                eval_correct += correct_mask.sum().item()
                eval_total += non_eos_mask.sum().item()

        avg_eval_loss = eval_loss / len(dataloader)
        eval_accuracy = eval_correct / eval_total if eval_total > 0 else 0
        if epoch % 20 == 0:
            print(f'Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Train Accuracy = {train_accuracy:.4f}, '
                  f'Eval Loss = {avg_eval_loss:.4f}, Eval Accuracy = {eval_accuracy:.4f}')


In [16]:
# Setup
vocab_size = len(word_to_index)
hidden_size = 64
model = TranslationModel(vocab_size, hidden_size).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
criterion = nn.CrossEntropyLoss(ignore_index=word_to_index["EOS"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# DataLoader
dataset = TranslationDataset(english_to_french, word_to_index)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_batch)

# Train and evaluate
train_and_evaluate(model, dataloader, optimizer, criterion, 100, device)


Epoch 1: Train Loss = 5.9175, Train Accuracy = 0.0147, Eval Loss = 5.6866, Eval Accuracy = 0.0916
Epoch 21: Train Loss = 3.9396, Train Accuracy = 0.3004, Eval Loss = 3.5779, Eval Accuracy = 0.3516
Epoch 41: Train Loss = 2.6037, Train Accuracy = 0.6081, Eval Loss = 2.1614, Eval Accuracy = 0.7802
Epoch 61: Train Loss = 1.6682, Train Accuracy = 0.8718, Eval Loss = 1.2199, Eval Accuracy = 0.9231
Epoch 81: Train Loss = 1.0594, Train Accuracy = 0.9670, Eval Loss = 0.6327, Eval Accuracy = 0.9927


In [17]:
# Setup
vocab_size = len(word_to_index)
hidden_size = 64
model = TranslationModel(vocab_size, hidden_size).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
criterion = nn.CrossEntropyLoss(ignore_index=word_to_index["EOS"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset = TranslationDataset(french_to_english, word_to_index)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True, collate_fn=collate_batch)

# Train and evaluate
train_and_evaluate(model, dataloader, optimizer, criterion, 100, device)

Epoch 1: Train Loss = 6.0273, Train Accuracy = 0.0000, Eval Loss = 5.7802, Eval Accuracy = 0.0604
Epoch 21: Train Loss = 4.1146, Train Accuracy = 0.2584, Eval Loss = 3.8052, Eval Accuracy = 0.3557
Epoch 41: Train Loss = 2.8085, Train Accuracy = 0.5906, Eval Loss = 2.3113, Eval Accuracy = 0.7450
Epoch 61: Train Loss = 1.8466, Train Accuracy = 0.8154, Eval Loss = 1.3261, Eval Accuracy = 0.9329
Epoch 81: Train Loss = 1.1360, Train Accuracy = 0.9732, Eval Loss = 0.7022, Eval Accuracy = 1.0000
