In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

# Toy sequence: [0, 1, 2, 3] → predict 4
# We'll use 1-hot encoding for simplicity
vocab_size = 10
input_seq = torch.tensor([[0, 1, 2, 3]])  # (batch, seq_len)
target = torch.tensor([4])               # next number

# One-hot encode input
input_onehot = nn.functional.one_hot(input_seq, num_classes=vocab_size).float()

# Define simple RNN model
class TinyRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)             # out: (batch, seq_len, hidden)
        last_hidden = out[:, -1, :]      # grab last time step
        return self.fc(last_hidden)

model = TinyRNN(vocab_size, hidden_size=16, output_size=vocab_size)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
for epoch in range(200):
    optimizer.zero_grad()
    output = model(input_onehot)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()

    if epoch % 40 == 0:
        pred = torch.argmax(output, dim=1).item()
        print(f"Epoch {epoch} | Loss: {loss.item():.4f} | Predicted: {pred}")


Epoch 0 | Loss: 2.4934 | Predicted: 0
Epoch 40 | Loss: 0.0034 | Predicted: 4
Epoch 80 | Loss: 0.0017 | Predicted: 4
Epoch 120 | Loss: 0.0013 | Predicted: 4
Epoch 160 | Loss: 0.0010 | Predicted: 4
