# Assignment 2

In this assignment, you will continue with the Bigram Language Model from the Lecture. Make the training loop and inference for the model.

## Importing Libraries

In [None]:
import os
import math
from dataclasses import dataclass
import torch
from torch.nn import functional as F
from utils import load_text, set_seed

## Configuration

In [None]:
@dataclass
class BigramConfig:
    root_dir: str = os.getcwd() + "/../../"
    dataset_path: str = "data/names.txt"

    # Tokenizer
    vocab_size: int = 0  # Set later

    seed: int = 101
    
config = BigramConfig()

## Reproducibility

In [None]:
set_seed(config.seed)

## Dataset

In [None]:
names = load_text(config.root_dir + config.dataset_path).splitlines()

## Preprocessing

In [None]:
# Add special token
names = ["." + name + "." for name in names]

## Tokenizer

In [None]:
chars = [chr(i) for i in range(97, 123)]  # all alphabet characters
chars.insert(0, ".")  # Add special token
config.vocab_size = len(chars)
str2idx = {char: idx for idx, char in enumerate(chars)}
idx2str = {idx: char for char, idx in str2idx.items()}

## Model

In [None]:
# Initialize weights
W = torch.randn(config.vocab_size, config.vocab_size, requires_grad=True)
b = torch.randn(config.vocab_size, requires_grad=True)
params = [W, b]

## Training

#### Task 1: Train Bigram Language Model (Neural Network Approach)

Make the training loop for the Bigram Language Model.

In [None]:
# Set of Input, Target pairs
inputs, targets = [], []
for name in names:
    for char1, char2 in zip(name, name[1:]):
        input = str2idx[char1]
        target = str2idx[char2]
        inputs.append(input)
        targets.append(target)

# Convert to tensor
inputs = torch.tensor(inputs, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

In [None]:
print(f"Number of Input, Target pairs: {len(inputs)}")
print(f"Input shape: {inputs.shape}")
print(f"Target shape: {targets.shape}")
print(f"First (Input, Target): ({inputs[0]}, {targets[0]})")
print(f"Second (Input, Target): ({inputs[1]}, {targets[1]})")

In [None]:
################################################################################
# TODO:                                                                        #
# One-hot encode the input tensor.                                             #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# Convert data type to float
inputs_encoded = inputs_encoded.float()

In [None]:
# Training Loop
steps = 100
lr = 10

for step in range(1, steps + 1):
    # Forward pass
    ################################################################################
    # TODO:                                                                        #
    # Implement the forward pass.                                                  #
    ################################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    
    # loss
    log_probs = torch.log(probs + 1e-9)  # Add small value to prevent log(0)
    loss = -log_probs[torch.arange(len(targets)), targets].mean()
    
    # Backward pass
    ################################################################################
    # TODO:                                                                        #
    # Implement the backward pass.                                                 #
    ################################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    
    # Update weights
    ################################################################################
    # TODO:                                                                        #
    # Update the weights of the model using the gradients.                         #
    ################################################################################
    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    
    if step % 10 == 0:
        print(f"Step {step}, Loss {loss.item():.4f}")

## Inference

#### Task 2: Generate a Name

Create a function to generate a name using the trained Bigram Language Model.

In [None]:
# Create a function to generate a name
def generate_name():
    new_name = []
    start_idx = str2idx["."]
    
    while True:
        ################################################################################
        # TODO:                                                                        #
        # 1. Forward pass                                                              #
        # 2. Sample the next token                                                     #
        # 3. Decode the token                                                          #
        # 4. Update the start_idx                                                      #
        # 5. Break if the next character is "."                                        #
        ################################################################################
        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
        # Forward pass

        
        # Sample

        
        # Decode

        
        # Update

        
        # Break if "."

        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
    return ''.join(new_name)

# Generate 5 names
for _ in range(5):
    print(generate_name())

## Extra Credit

We have already made our own custom auto-grad Tensor class. Let's use it!

Train the Bigram Language Model using our custom auto-grad Tensor class.

**Do not use any built-in PyTorch functions.** (other deep learning libraries are also prohibited)

In [None]:
class Tensor:
    def __init__(self, data, _children=(), _operation=''):
        self.data = data
        self._prev = set(_children)
        self.gradient = 0
        self._backward = lambda: None

    def __repr__(self):
        return f"tensor=({self.data})"

    def __add__(self, other):  # self + other
        output = Tensor(self.data + other.data, (self, other), '+')
        def _backward():
            self.gradient = 1 * output.gradient
            other.gradient = 1 * output.gradient
        output._backward = _backward
        return output

    def __mul__(self, other):  # self * other
        output = Tensor(self.data * other.data, (self, other), '*')
        def _backward():
            self.gradient = other.data * output.gradient
            other.gradient = self.data * output.gradient
        output._backward = _backward
        return output

    def tanh(self):  # tanh(self)
        output = Tensor(math.tanh(self.data), (self,), 'tanh')
        def _backward():
            self.gradient = (1.0 - math.tanh(self.data) ** 2) * output.gradient
        output._backward = _backward
        return output

    def __pow__(self, power):  # self ** power
        assert isinstance(power, (int, float)), "Power must be an int or a float"
        output = Tensor(self.data ** power, (self,), f'**{power}')
        def _backward():
            self.gradient = power * (self.data ** (power - 1)) * output.gradient
        output._backward = _backward
        return output

    def backward(self):
        topo = []
        visited = set()
        def build_topo(v):
            if v not in visited:
                visited.add(v)
                for child in v._prev:
                    build_topo(child)
                topo.append(v)
        build_topo(self)
        self.gradient = 1
        for node in reversed(topo):
            node._backward()

    def __neg__(self): # -self
        return self * Tensor(-1.0)

    def __sub__(self, other): # self - other
        return self + (-other)

In [None]:
################################################################################
# TODO:                                                                        #
################################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****