In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [None]:
# Define the Encoder
class Encoder(nn.Module):
def __init__(self, input_size, hidden_size, num_layers=1):
    super(Encoder, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    
    self.embedding = nn.Embedding(input_size, hidden_size)
    self.rnn = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
    
def forward(self, x):
    # x shape: (batch_size, seq_length)
    embedded = self.embedding(x)
    # embedded shape: (batch_size, seq_length, hidden_size)
    
    outputs, hidden = self.rnn(embedded)
    return outputs, hidden

# Define the Decoder
class Decoder(nn.Module):
def __init__(self, hidden_size, output_size, num_layers=1):
    super(Decoder, self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    
    self.embedding = nn.Embedding(output_size, hidden_size)
    self.rnn = nn.GRU(hidden_size, hidden_size, num_layers, batch_first=True)
    self.out = nn.Linear(hidden_size, output_size)
    
def forward(self, x, hidden):
    # x shape: (batch_size, 1)
    x = x.unsqueeze(1)
    embedded = self.embedding(x)
    # embedded shape: (batch_size, 1, hidden_size)
    
    output, hidden = self.rnn(embedded, hidden)
    prediction = self.out(output.squeeze(1))
    return prediction, hidden

# Define the Seq2Seq model
class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
    super(Seq2Seq, self).__init__()
    self.encoder = encoder
    self.decoder = decoder
    self.device = device
    
def forward(self, source, target, teacher_forcing_ratio=0.5):
    batch_size = source.shape[0]
    target_len = target.shape[1]
    target_vocab_size = self.decoder.out.out_features
    
    outputs = torch.zeros(batch_size, target_len, target_vocab_size).to(self.device)
    
    # Encoder
    encoder_outputs, hidden = self.encoder(source)
    
    # First decoder input is the SOS token
    decoder_input = torch.tensor([[SOS_token]] * batch_size).to(self.device)
    
    for t in range(1, target_len):
        output, hidden = self.decoder(decoder_input, hidden)
        outputs[:, t] = output
        teacher_force = torch.rand(1).item() < teacher_forcing_ratio
        top1 = output.max(1)[1]
        decoder_input = target[:, t] if teacher_force else top1
    
    return outputs

# Custom Dataset class
class TranslationDataset(Dataset):
def __init__(self, english_sentences, french_sentences, eng_vocab, fr_vocab):
    self.english_sentences = english_sentences
    self.french_sentences = french_sentences
    self.eng_vocab = eng_vocab
    self.fr_vocab = fr_vocab
    
def __len__(self):
    return len(self.english_sentences)

def __getitem__(self, idx):
    eng_sent = self.english_sentences[idx]
    fr_sent = self.french_sentences[idx]
    
    # Convert sentences to indices
    eng_indices = [self.eng_vocab[word] for word in eng_sent.split()]
    fr_indices = [self.fr_vocab[word] for word in fr_sent.split()]
    
    return torch.tensor(eng_indices), torch.tensor(fr_indices)

# Training function
def train(model, train_loader, optimizer, criterion, device):
model.train()
total_loss = 0

for batch_idx, (eng, fr) in enumerate(train_loader):
    eng, fr = eng.to(device), fr.to(device)
    
    optimizer.zero_grad()
    output = model(eng, fr)
    
    output = output.view(-1, output.shape[-1])
    fr = fr.view(-1)
    
    loss = criterion(output, fr)
    loss.backward()
    
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
    optimizer.step()
    
    total_loss += loss.item()

return total_loss / len(train_loader)

# 

In [None]:
# Hyperparameters
HIDDEN_SIZE = 256
NUM_LAYERS = 2
BATCH_SIZE = 32
LEARNING_RATE = 0.001
NUM_EPOCHS = 20

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Create vocabulary (you'll need to implement this based on your data)
# eng_vocab = create_vocabulary(english_sentences)
# fr_vocab = create_vocabulary(french_sentences)

# Create model
encoder = Encoder(len(eng_vocab), HIDDEN_SIZE, NUM_LAYERS).to(device)
decoder = Decoder(HIDDEN_SIZE, len(fr_vocab), NUM_LAYERS).to(device)
model = Seq2Seq(encoder, decoder, device).to(device)

# Create dataset and dataloader
dataset = TranslationDataset(english_sentences, french_sentences, eng_vocab, fr_vocab)
train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Define optimizer and loss
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.CrossEntropyLoss(ignore_index=PAD_token)

# Training loop
for epoch in range(NUM_EPOCHS):
    loss = train(model, train_loader, optimizer, criterion, device)
    print(f'Epoch: {epoch+1:02} | Loss: {loss:.3f}')