In [1]:
import torch

words = open('names.txt', 'r').read().splitlines()

chars = sorted(list(set(''.join(words))))
chars = ['.'] + chars
stoi = {s:i for i, s in enumerate(chars)}
itos = {i:s for s, i in stoi.items()}

# create index table for two characters
two_chars = [ch1 + ch2 for ch1 in chars for ch2 in chars]
two_stoi = {s:i for i, s in enumerate(two_chars)}
two_itos = {i:s for s, i in two_stoi.items()}

## Trigram model using count table

In [2]:
# create count table
N = torch.zeros(27*27, 27)

# fill in count table
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1 = two_stoi[ch1 + ch2]
        ix2 = stoi[ch3]
        N[ix1, ix2] += 1

In [3]:
# sample names with count table
g = torch.Generator().manual_seed(4832878)

# create probability table
P = (N+0.0001).float()
P /= P.sum(1, keepdims=True)

for _ in range(5):
    out = ['.', '.']
    while True:
        last_two_str = out[-2] + out[-1]
        ix = two_stoi[last_two_str]
        p = P[ix]
        next_ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        out.append(itos[next_ix])
        if next_ix == 0:
            break
    print(''.join(out[2:-1]))

istmato
ferrellius
treya
caylynna
yotan


In [4]:
# calculate loss
log_likelihood = 0.0
n = 0

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1 = two_stoi[ch1 + ch2]
        ix2 = stoi[ch3]
        prob = P[ix1, ix2]
        log_prob = torch.log(prob)
        log_likelihood += log_prob
        n += 1

nll = -log_likelihood
nll_mean = nll / n
nll_mean

tensor(2.0620)

## Trigram model using neural network

In [5]:
# create trainig set (x, y)
xs = []
ys = []

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2, ch3 in zip(chs, chs[1:], chs[2:]):
        ix1 = two_stoi[ch1 + ch2]
        ix2 = stoi[ch3]
        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)

W = torch.zeros((27 * 27, 27), requires_grad=True)

In [6]:
import torch.nn.functional as F

for k in range(1000):

    # forward pass
    xenc = F.one_hot(xs, num_classes=27*27).float()
    logits = xenc @ W
    count = logits.exp()
    probs = count / count.sum(1, keepdims=True)
    loss = -probs[torch.arange(ys.nelement()), ys].log().mean()

    # backward pass
    W.grad = None
    loss.backward()

    # update
    W.data += -200 * W.grad

print(loss.item())

2.075978994369507


In [8]:
g = torch.Generator().manual_seed(4832878)

for i in range(5):
    out = ['.', '.']
    while True:
        last_two_str = out[-2] + out[-1]
        ix = two_stoi[last_two_str]
        
        xenc = F.one_hot(torch.tensor([ix]), num_classes=27*27).float()
        logits = xenc @ W
        counts = logits.exp()
        probs = counts / counts.sum(1, keepdims=True)

        next_ix = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
        next_char = itos[next_ix]
        out.append(next_char)
        
        if next_ix == 0:
          break
    print(''.join(out[2:-1]))

istonto
ferrellius
treya
caylynna
yotan
