# Cell Descriptions
1. **Install Dependencies**: Installs the required Python packages, including PyTorch and NumPy.
2. **Data Preparation**: Prepares the text data for training by encoding characters into integers and creating input-output pairs.
3. **Training Data Preparation**: Converts the input-output pairs into PyTorch tensors for model training.
4. **Model Definition**: Defines the TinyGPT model, including its embedding and linear layers.
5. **Training Loop**: Trains the TinyGPT model using the prepared data and prints the loss every 100 steps.
6. **Text Generation Function**: Implements a function to generate text using the trained TinyGPT model.

In [2]:
%pip install torch
%pip install numpy

Note: you may need to restart the kernel to use updated packages.
Collecting numpy
  Downloading numpy-2.2.4-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Downloading numpy-2.2.4-cp313-cp313-macosx_14_0_arm64.whl (5.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.1/5.1 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: numpy
Successfully installed numpy-2.2.4
Note: you may need to restart the kernel to use updated packages.


In [3]:
# Prepartion of data
import torch
import torch.nn as nn
import torch.nn.functional as F


text = """The quick brown fox jumps over the lazy dog"""

# Get all the unique characters in the text
chars = sorted(list(set(text)))
vocab_size = len(chars)

# Create mapping char --> index and index --> char
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for i, ch in enumerate(chars)}


# Encoding: string to list of integers (tokens)
def encode(s):
    return [stoi[c] for c in s]

# Decoding: list of integers (tokens) to string
def decode(l):
    return ''.join([itos[i] for i in l])


In [4]:
# 2. Prepare Training Data


block_size = 8 # Number of input char the mode sees at a time

X = [] # Input Sequence
Y = [] # Target Sequence (next character for each input)


# Loop over the text to create training example
for i in range(len(text) - block_size):
    chuck = text[i: i + block_size] # Input sequence
    target = text[i+1: i + block_size + 1] # Target sequence shift by one char
    # Convert in int
    X.append(encode(chuck)) # Encode the input sequence
    Y.append(encode(target)) # Encode the target sequence


# Important to convert this into pytorch tensors

X = torch.tensor(X)
Y = torch.tensor(Y)

In [5]:
# Build My Tiny GPT Model

# NN
class TinyGPT(nn.Module):
    def __init__(self, vocab_size, n_embed=32):
        super().__init__()

        # Learnable Embedding table that maps token id to vectors
        self.token_embedding_table = nn.Embedding(vocab_size, n_embed)

        # Linear layer to project the embedding to vocab size  
        self.lm_head = nn.Linear(n_embed, vocab_size)

    def forward(self, idx, targets=None):
        # idx shape: (batch_size, block_size)

        # Lookup embeddings for input tokens
        embeddings = self.token_embedding_table(idx) # (batch_size, block_size, n_embed)

        # Get the raw predictions for each position
        logits = self.lm_head(embeddings)

        if targets is None:
            return logits, None # For inference/generation only
        
        B, T, C = logits.shape # B: batch size, T: block size, C: vocab size
        logits = logits.view(B * T, C)
        targets = targets.view(B * T)

        # Compute the loss between predictions and targets
        loss = F.cross_entropy(logits, targets)
        return logits, loss


In [7]:
# 4. Train the Model

# Instantiate the model
model = TinyGPT(vocab_size)


optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Training loop
for i in range(500):

    # Forward pass
    logits, loss = model(X, Y)

    # Backpropagation
    optimizer.zero_grad() # Clear previous gradients
    loss.backward() # Backpropagation

    # Update weights
    optimizer.step() # Update weights

    # Print loss every 100 steps
    if i % 100 == 0:
        print(f"Step {i}, Loss: {loss.item():.4f}")

Step 0, Loss: 3.5435
Step 100, Loss: 1.8371
Step 200, Loss: 1.0007
Step 300, Loss: 0.7554
Step 400, Loss: 0.6895


# Text Generation Function
This cell defines a function `generate` that generates text using the trained TinyGPT model. The function takes a starting text and generates a sequence of tokens by sampling from the model's predictions. It uses the softmax function to convert logits into probabilities and samples the next token based on these probabilities. The generated tokens are then decoded back into text.

In [11]:
# 5. Text Generation Function

def generate(model, start_text="Th ", max_new_tokens=100):
    # Set model to evaluation mode
    model.eval();

    # Convert start text to token idx
    context = torch.tensor(
        [encode(start_text)], dtype=torch.long
    )

    for _ in range(max_new_tokens):

        # Only use the last `block_size` tokens for prediction
        logits, _ = model(context[:, -block_size:])

        # Focus on the last tim steps logits
        probs = F.softmax(logits[:, -1, :], dim=-1)

        # Sample the next token from probability distribution
        next_id = torch.multinomial(probs, num_samples=1)

        # Append the predicted token to the context
        context = torch.cat((context, next_id), dim=1)
    # Convert the token idx to string
    return decode(context[0].tolist())


print(generate(model, start_text="abc"))


abck ox jumps over juick fox doverox jumps own quickr fove own qumpazy quick broverove lazy tazy the oz
