In [59]:
# !uv pip install torch torchvision
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F

In [2]:
with open("names.txt", "r") as f:
    names = f.read().split("\n")

In [3]:
cnt = {}
for name in names[:1]:
    name = "." + name + "."
    for c1, c2 in zip(name, name[1:]):
        cnt[(c1, c2)] = cnt.get((c1, c2), 0) + 1
        # print(c1, c2)



In [8]:
stoi = {c: idx for c, idx in zip(sorted(list(set("".join(names)))), range(1, 27))}
stoi["."] = 0
itos = {c: i for i, c in stoi.items()}

In [20]:
N = torch.zeros((27, 27), dtype=torch.int32)
for name in names:
    name = ["."] + list(name) + ["."]
    for c1, c2 in zip(name, name[1:]):
        N[stoi[c1]][stoi[c2]] += 1

In [49]:
P = (N + 1).float()
P = P / P.sum(1, keepdim=True)

In [51]:
g = torch.Generator().manual_seed(2147483647)

out = []
ix = 0
while True:
    p = P[ix]
    ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
    out.append(itos[ix])
    if ix == 0:
        # end token: break
        break



In [54]:
loglike = 0.0
n = 0
for name in names:
    n += 1
    name = ["."] + list(name) + ["."]
    for c1, c2 in zip(name, name[1:]):
        id1 = stoi[c1]
        id2 = stoi[c2]

        prob = P[id1, id2]
        logprob = torch.log(prob)
        loglike += logprob
        print(f"{c1, c2}: prob: {prob:.4f}")
negloglike = -loglike
print(f"loss: {negloglike / n}")

        

('.', 'v'): prob: 0.0118
('v', 'i'): prob: 0.3508
('i', 'n'): prob: 0.1200
('n', 'c'): prob: 0.0117
('c', 'e'): prob: 0.1551
('e', 'n'): prob: 0.1309
('n', 't'): prob: 0.0242
('t', '.'): prob: 0.0865
loss: 22.129831314086914


In [81]:
trainX, trainY = [], []

for name in names[:3]:
    name = ["."] + list(name) + ["."]
    for c1, c2 in zip(name, name[1:]):
        id1 = stoi[c1]
        id2 = stoi[c2]
        
        trainX.append(id1)
        trainY.append(id2)

trainX = torch.tensor(trainX)
trainY = torch.tensor(trainY)

In [82]:
trainX = F.one_hot(trainX, num_classes=27).float()
trainX.shape

torch.Size([16, 27])

In [57]:
trainY

tensor([ 5, 13, 13,  ..., 26, 24,  0])

In [95]:
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)

In [115]:
iters = 100
for k in range(iters):

    logits = trainX @ W
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdim=True) 
    loss = -probs[torch.arange(probs.shape[0]), trainY].log().mean()

    W.grad = None
    loss.backward()

    W.data += W.grad * -1
    print(loss)