<a href="https://colab.research.google.com/github/saparbayev-azizbek-12/bi-and-ai-talents-dl/blob/main/lesson-11/Bengio_MLP_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Homework

In [2]:
%%capture
!wget https://raw.githubusercontent.com/karpathy/makemore/refs/heads/master/names.txt

In [3]:
import torch
import torch.nn.functional as F

names = open('names.txt').read().splitlines()
vocab = sorted(set(''.join(names) + '.'))
stoi = {v: i for i, v in enumerate(vocab)}
itos = {i: v for v, i in stoi.items()}

def encode(name: str) -> list[int]:
    return [stoi[s] for s in name]

def decode(seq: list[int]) -> str:
    return ''.join([itos[i] for i in seq])

In [4]:
block_size = 3
X, Y = [], []

for name in names:
    context = [0] * block_size
    for ch in name + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)

In [5]:
class BengioMLP(torch.nn.Module):
    def __init__(self, vocab_size, n_embd=10, n_hidden=100, block_size=3, scale=0.1):
        super().__init__()
        torch.manual_seed(42)
        self.block_size = block_size
        self.C = torch.nn.Parameter(torch.randn(vocab_size, n_embd) * scale)
        self.W1 = torch.nn.Parameter(torch.randn(block_size * n_embd, n_hidden) * scale)
        self.b1 = torch.nn.Parameter(torch.zeros(n_hidden))
        self.W2 = torch.nn.Parameter(torch.randn(n_hidden, vocab_size) * scale)
        self.b2 = torch.nn.Parameter(torch.zeros(vocab_size))
        self.W3 = torch.nn.Parameter(torch.randn(block_size * n_embd, vocab_size) * scale)

    def forward(self, X):
        xenc = self.C[X]
        x_emb = xenc.view(xenc.size(0), -1)
        h = torch.tanh(x_emb @ self.W1 + self.b1)
        logits = h @ self.W2 + self.b2 + x_emb @ self.W3
        return logits

    def loss(self, X, Y):
        logits = self.forward(X)
        return F.cross_entropy(logits, Y)

    def generate(self, num_samples=10):
        for _ in range(num_samples):
            out = []
            context = [0] * self.block_size
            while True:
                x = torch.tensor([context])
                logits = self.forward(x)
                probs = F.softmax(logits, dim=1)
                ix = torch.multinomial(probs, num_samples=1).item()
                if itos[ix] == '.':
                    break
                out.append(itos[ix])
                context = context[1:] + [ix]
            print(''.join(out))

In [None]:
vocab_size = len(vocab)
model = BengioMLP(vocab_size)
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
lossi = []

for step in range(201):
    optimizer.zero_grad()
    loss = model.loss(X, Y)
    lossi.append(loss)
    loss.backward()
    optimizer.step()
    if step % 20 == 0:
        print(f"Step {step:3d} | Loss: {loss.item():.4f}")

Step   0 | Loss: 3.3012
Step  20 | Loss: 2.3816
Step  40 | Loss: 2.3148
Step  60 | Loss: 2.2875
Step  80 | Loss: 2.2535
Step 100 | Loss: 2.2200
Step 120 | Loss: 2.1874
Step 140 | Loss: 2.1630


In [None]:
import matplotlib.pyplot as plt
plt.plot(lossi)
plt.show()

In [None]:
model.generate(10)