In [29]:
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [5]:
words = open('names.txt', 'r').read().splitlines()

In [6]:
N = torch.zeros((27,27), dtype=torch.int32)

In [7]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

In [8]:
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        N[ix1, ix2] += 1 

In [9]:
# %matplotlib inline
# plt.figure(figsize=(16,16))
# plt.imshow(N, cmap='Blues')
# for i in range(27):
#     for j in range(27):
#         chstr = itos[i] + itos[j]
#         plt.text(j, i , chstr, ha="center", va="bottom", color="gray")
#         plt.text(j,i, N[i, j].item(), ha="center", va="top", color="gray")
# plt.axis('off')

In [10]:
P = (N+1).float()
P /= P.sum(1, keepdim=True)

In [11]:
g = torch.Generator().manual_seed(2147483647)
for i in range(5):
    ix = 0
    out=[]
    while True:
        p = P[ix]
        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        if ix == 0:
            break
        out.append(itos[ix])
    print(''.join(out))

cexze
momasurailezitynn
konimittain
llayn
ka


In [12]:
log_likelihood = 0
n = 0
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        prob = P[ix1, ix2]
        logprob = torch.log(prob)
        log_likelihood += logprob
        n += 1
        # print(f"{ch1}{ch2}: {prob*100:.4f} {logprob:.4f}")
print(f"{log_likelihood/n=}")
nll = -log_likelihood
print(f"{nll=}")
print(f"{nll/n}")

log_likelihood/n=tensor(-2.4544)
nll=tensor(559951.5625)
2.4543561935424805


In [35]:
# create training set of all bigrams
xs, ys = [],[]

for w in words[:1]:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        print(f"{ch1}={ix1} {ch2}={ix2}")
        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
print(f"{xs=}")
print(f"{ys=}")

.=0 e=5
e=5 m=13
m=13 m=13
m=13 a=1
a=1 .=0
xs=tensor([ 0,  5, 13, 13,  1])
ys=tensor([ 5, 13, 13,  1,  0])


In [38]:
xenc = F.one_hot(xs, num_classes=27).float()
# plt.imshow(xenc)

In [41]:
xenc.shape

torch.Size([5, 27])

In [49]:
W = torch.rand((27,27))
ans = xenc @ W
ans

tensor([[0.7457, 0.4106, 0.1386, 0.4667, 0.1175, 0.0680, 0.3931, 0.4294, 0.1806,
         0.7127, 0.8871, 0.5444, 0.4972, 0.8266, 0.4876, 0.7417, 0.9823, 0.6557,
         0.1149, 0.6657, 0.3040, 0.8743, 0.5876, 0.0090, 0.5713, 0.4209, 0.2981],
        [0.9490, 0.3318, 0.0325, 0.9076, 0.1185, 0.4342, 0.9365, 0.7486, 0.7972,
         0.4068, 0.1183, 0.3115, 0.3506, 0.7485, 0.1552, 0.6267, 0.1149, 0.2278,
         0.3477, 0.4116, 0.9146, 0.5002, 0.5523, 0.6706, 0.0615, 0.8107, 0.7669],
        [0.9054, 0.3974, 0.1260, 0.4503, 0.0471, 0.6786, 0.9308, 0.0930, 0.0619,
         0.5489, 0.2762, 0.1948, 0.5526, 0.1963, 0.1837, 0.9075, 0.6450, 0.2490,
         0.7418, 0.1487, 0.1018, 0.9133, 0.0133, 0.3043, 0.4224, 0.5949, 0.8299],
        [0.9054, 0.3974, 0.1260, 0.4503, 0.0471, 0.6786, 0.9308, 0.0930, 0.0619,
         0.5489, 0.2762, 0.1948, 0.5526, 0.1963, 0.1837, 0.9075, 0.6450, 0.2490,
         0.7418, 0.1487, 0.1018, 0.9133, 0.0133, 0.3043, 0.4224, 0.5949, 0.8299],
        [0.6293, 0.8476,

In [50]:
ans.shape

torch.Size([5, 27])