In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Basic Sequence-to-Sequence Model
class SimpleSeq2Seq(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleSeq2Seq, self).__init__()
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        embedded = self.embedding(x)
        output, _ = self.rnn(embedded)
        output = self.fc(output[:, -1, :])
        return output

# Training Function
def train_simple_seq2seq(model, source, target, criterion, optimizer, epochs=10):
    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(source)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss.item()}')

# Example Usage
vocabulary_size = 1000
source_sentences = torch.randint(0, vocabulary_size, (100, 10))  # 100 sentences of length 10
target_sentences = torch.randint(0, vocabulary_size, (100,))       # Target sentences for each source sentence

model = SimpleSeq2Seq(input_size=vocabulary_size, hidden_size=256, output_size=vocabulary_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

train_simple_seq2seq(model, source_sentences, target_sentences, criterion, optimizer, epochs=10)


Epoch 1/10, Loss: 6.920022010803223
Epoch 2/10, Loss: 5.800600528717041
Epoch 3/10, Loss: 4.181109428405762
Epoch 4/10, Loss: 2.7193994522094727
Epoch 5/10, Loss: 1.0923782587051392
Epoch 6/10, Loss: 0.4692320227622986
Epoch 7/10, Loss: 0.1878213733434677
Epoch 8/10, Loss: 0.08234763145446777
Epoch 9/10, Loss: 0.04030423238873482
Epoch 10/10, Loss: 0.021812839433550835
