In [65]:
import torch
import random
import torch.nn.functional as F
from tqdm import trange

In [66]:
names = open('../names.txt', 'r').read().splitlines()

chars = sorted(set('.'.join(names)))

itos = {i:c for i, c in enumerate(chars)}
stoi = {c:i for i, c in enumerate(chars)}

In [67]:
context_len = 6
feature_count = 25
w_size = 300

In [68]:
def build_dataset(names):
    X, Y = [], []

    for name in names:
        name += '.'
        context = [0] * context_len
        for ch in name:
            X.append(context)
            Y.append(stoi[ch])
            context = context[1:] + [stoi[ch]]

    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X, Y

In [69]:
n = int(len(names) * 0.8)

random.shuffle(names)

Xtrain, Ytrain = build_dataset(names[:n])
Xval, Yval = build_dataset(names[n:])

In [70]:
C = torch.randn(27, feature_count) * 0.01
W1 = torch.randn(feature_count*context_len, w_size) * 0.01
b1 = torch.randn(w_size) * 0.0
W2 = torch.randn(w_size, 27) * 0.01
b2 = torch.randn(27) * 0.0

parameters = [C, W1, b1, W2, b2]

In [71]:
for p in parameters:
    p.requires_grad = True

In [87]:
optim = torch.optim.Adam(parameters)
for _ in (t:=trange(10000)):
    ix = torch.randint(0, Xtrain.shape[0], (50,))

    emb = C[Xtrain[ix]].flatten(1)
    hpreact = emb @ W1 + b1
    h = torch.tanh(hpreact)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits, Ytrain[ix])
    #cross entropy is taking the logits, exponentiating them, normalizing them, taking which value our output should be, and whatever this is taking the negative log of it such that low probabilities for the desired output result in a high loss, and outputs close to one of this probability for the expected are close to 0 loss because we are effectively waving our confidence in the correct option.

    optim.zero_grad()

    loss.backward()

    optim.step()

    t.set_description(f'loss: {loss.data}')

loss: 1.824375867843628: 100%|██████████| 10000/10000 [00:11<00:00, 895.77it/s]


In [93]:
emb = C[Xtrain].flatten(1)
hpreact = emb @ W1 + b1
h = torch.tanh(hpreact)
logits = h @ W2 + b2
loss = F.cross_entropy(logits, Ytrain)

print(loss.data)

tensor(1.8970)


In [91]:
res = []
for _ in range(10):
    name = ''
    context = [0] * context_len
    while True:
        emb = C[torch.tensor([context])].flatten(1)
        hpreact = emb @ W1 + b1
        h = torch.tanh(hpreact)
        logits = h @ W2 + b2
        probs = F.softmax(logits)

        ix = torch.multinomial(probs, num_samples=1)[0].item()

        name += itos[ix]
        context = context[1:] + [ix]

        if ix == 0:
            res.append(name)
            break
    
res

  probs = F.softmax(logits)


['sadhya.',
 'oluwatomi.',
 'emrest.',
 'sephi.',
 'karie.',
 'draeth.',
 'lillyse.',
 'ezelle.',
 'jalei.',
 'kenli.']