<a href="https://colab.research.google.com/github/sashalex007/makemore-deeplearning/blob/main/makemore_mlp2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
import os
words = []
if os.path.isfile('Documents/deep_learning/names.txt'):
  words = open('Documents/deep_learning/names.txt', 'r').read().splitlines()
else:
  from google.colab import drive
  drive.mount('/content/drive')
  words = open('/content/drive/MyDrive/colab_files/names.txt', 'r').read().splitlines()
words[:8]

In [None]:
len(words)

In [None]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}
print(itos)

In [None]:
def build_dataset(words):
    block_size = 4
    X, Y = [], []
    for w in words:
        context = [0] * block_size
        for ch in w + '.':
            ix = stoi[ch]
            X.append(context)
            Y.append(ix)
            context = context[1:] + [ix]
    X = torch.tensor(X).to(device)
    Y = torch.tensor(Y).to(device)
    print(X.shape, Y.shape)
    return X, Y

import random
random.seed(42)
random.shuffle(words)
n1 = int(0.8*len(words))
n2 = int(0.9*len(words))

Xtr, Ytr = build_dataset(words[:n1])
Xdev, Ydev = build_dataset(words[n1:n2])
Xte, Yte = build_dataset(words[n2:])


In [None]:
Xtr.shape, Ytr.shape

In [None]:
C = torch.randn(27, 20).to(device)
W1 = torch.randn(80, 120).to(device)
b1 = torch.randn(120).to(device)
W2 = torch.randn(120, 27).to(device)
b2 = torch.randn(27).to(device)
parameters = [C, W1, b1, W2, b2]

In [None]:
for p in parameters:
    p.requires_grad = True

In [None]:
for i in range(100000):

    ix = torch.randint(0, Xtr.shape[0], (32,))

    emb = C[Xtr[ix]]
    h = torch.tanh(emb.view(-1, 80) @ W1 + b1)

    logits = h @ W2 + b2
    loss = F.cross_entropy(logits, Ytr[ix])
    for p in parameters:
        p.grad = None
    loss.backward()

    lr = 0.00001
    for p in parameters:
        p.data += -lr*p.grad

emb = C[Xtr]
h = torch.tanh(emb.view(-1, 80) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits, Ytr)
loss.item()

In [None]:
emb = C[Xdev]
h = torch.tanh(emb.view(-1, 80) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits, Ydev)
loss.item()

In [None]:
emb = C[Xte]
h = torch.tanh(emb.view(-1, 80) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits, Yte)
loss.item()

In [None]:
for _ in range(20):
    block_size = 4
    out = []
    context = [0] * block_size
    while True:
        emb = C[torch.tensor([context])]
        h = torch.tanh(emb.view(1, -1) @ W1 + b1)
        logits = h @ W2 + b2
        probs = F.softmax(logits, dim=1)
        ix = torch.multinomial(probs, num_samples=1).item()
        context = context[1:] + [ix]
        out.append(ix)
        if ix == 0:
            break
    print(''.join(itos[i] for i in out))