In [11]:
import torch

# Our training data: "hello"
text = "hello"

# Create a vocabulary (unique characters)
chars = ['h', 'e', 'l', 'o']
vocab_size = len(chars)  # 4 characters

# Create a dictionary to map characters to numbers
char_to_idx = {char: i for i, char in enumerate(chars)}
print(char_to_idx)  # {'h':0, 'e':1, 'l':2, 'o':3}

# Convert the entire "hello" to numbers
encoded_text = [char_to_idx[c] for c in text]
print(encoded_text)  # [0, 1, 2, 2, 3]

{'h': 0, 'e': 1, 'l': 2, 'o': 3}
[0, 1, 2, 2, 3]


In [6]:
# Define the input sequence length (context window)
seq_length = 3  # Model sees 3 characters to predict the 4th

# Create input sequences (X) and target characters (y)
X = []  # Inputs (lists of 3 numbers)
y = []  # Targets (single number)

for i in range(len(encoded_text) - seq_length):
    X.append(encoded_text[i:i+seq_length])  # First 3 chars
    y.append(encoded_text[i+seq_length])    # 4th char

# Convert to PyTorch tensors (arrays)
X = torch.tensor(X)
y = torch.tensor(y)

print("Inputs (X):\n", X)
print("Targets (y):\n", y)

Inputs (X):
 tensor([[0, 1, 2],
        [1, 2, 2]])
Targets (y):
 tensor([2, 3])


In [7]:
class TinyModel(torch.nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.embedding = torch.nn.Embedding(vocab_size, 8)  # 8-dimensional embeddings
        self.rnn = torch.nn.RNN(8, 16, batch_first=True)    # 16 hidden units
        self.fc = torch.nn.Linear(16, vocab_size)           # Final prediction layer

    def forward(self, x):
        # Step 1: Embed the input (turn numbers into vectors)
        x = self.embedding(x)  # Shape: (batch_size, seq_length, 8)

        # Step 2: Pass through RNN
        out, _ = self.rnn(x)   # Shape: (batch_size, seq_length, 16)

        # Step 3: Take the last output of the RNN
        out = out[:, -1, :]    # Shape: (batch_size, 16)

        # Step 4: Predict the next character
        logits = self.fc(out)  # Shape: (batch_size, vocab_size)
        return logits

model = TinyModel(vocab_size)
print(model)

TinyModel(
  (embedding): Embedding(4, 8)
  (rnn): RNN(8, 16, batch_first=True)
  (fc): Linear(in_features=16, out_features=4, bias=True)
)


In [8]:
# Loss function: Measures how wrong the model is
criterion = torch.nn.CrossEntropyLoss()

# Optimizer: Adjusts the model's weights to reduce loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)

# Training loop (teach the model 100 times)
for epoch in range(100):
    # Forward pass
    outputs = model(X)  # Model makes predictions
    loss = criterion(outputs, y)  # Compare predictions to targets

    # Backward pass (learn from mistakes)
    optimizer.zero_grad()  # Reset gradients
    loss.backward()        # Compute gradients
    optimizer.step()       # Update weights

    # Print progress
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 1.2616
Epoch 10, Loss: 0.0011
Epoch 20, Loss: 0.0001
Epoch 30, Loss: 0.0000
Epoch 40, Loss: 0.0000
Epoch 50, Loss: 0.0000
Epoch 60, Loss: 0.0000
Epoch 70, Loss: 0.0000
Epoch 80, Loss: 0.0000
Epoch 90, Loss: 0.0000


In [9]:
def predict_next_char(model, start_str):
    model.eval()  # Switch to evaluation mode
    chars = [char_to_idx[c] for c in start_str]  # Convert "hel" to [0,1,2]

    # Predict next character
    x = torch.tensor(chars).unsqueeze(0)  # Add batch dimension
    pred = model(x)                       # Shape: (1, 4)
    next_char_idx = torch.argmax(pred).item()  # Get most likely index

    # Convert back to character
    next_char = idx_to_char[next_char_idx]
    return next_char

# Test the model
start_str = "hel"
predicted_char = predict_next_char(model, start_str)
print(f"After '{start_str}', the model predicts: '{predicted_char}'")

After 'hel', the model predicts: 'e'
