In [82]:
import torch
import torch.nn.functional as F
from typing import List, Dict, Tuple
from matplotlib import pyplot as plt

In [2]:
# Read file
words: List[str] = []
with open('names.txt', 'r') as file:
    words = file.read().splitlines()
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [41]:
# Create lookups
chars: List[str] = sorted(list(set(''.join(words))))
chars.extend(['.'])

n_chars = len(chars)

stoi: Dict[str, int] = {s:i for i,s in enumerate(chars)}
itos: Dict[int, str] = {i:s for s, i in stoi.items()}

In [4]:
# Create bigram with dictionary
bigrams: Dict[tuple, int] = {}
for word in words:
    chs = ['.'] + list(word) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        bigram = (ch1, ch2)
        bigrams[bigram] = bigrams.get(bigram, 0) + 1

In [5]:
# Create bigram with matrix
N = torch.zeros((n_chars, n_chars), dtype=torch.int32)

for word in words:
    chs = ['.'] + list(word) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        inx1 = stoi[ch1]
        inx2 = stoi[ch2]
        N[inx1, inx2] += 1

In [60]:
# Efficiency
N = N + 1 # Model smoothing
P = N.float()
P = P / P.sum(dim=1, keepdim=True)
P[0].sum().item()

1.0

In [61]:
gen = torch.Generator().manual_seed(2147483647)

for _ in range(5):
    inx: int = 0
    out: List[str] = []

    while True:
        p = P[inx]
        inx = torch.multinomial(p, num_samples=1, replacement=True, generator=gen).item()
        char = itos[inx]
        out.append(char)

        if char == '.':
            break

    print(''.join(out))

na.
kenen.
lyri.
iva.
rri.


In [65]:
# Loss function
log_likelihood: float = 0.0
count: int = 0

for word in words:
# for word in ["keivan"]:
    chs = ['.'] + list(word) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        inx1 = stoi[ch1]
        inx2 = stoi[ch2]
        prob = P[inx1, inx2]         # Bigram probability
        log_pob = torch.log(prob)    # Bigram log probability
        log_likelihood += log_pob    # Log likelihood
        count += 1

avg_log_likelihood = log_likelihood / count
negative_avg_log_likelihood = -avg_log_likelihood
print(f"{negative_avg_log_likelihood=:.4}")

negative_avg_log_likelihood=2.454


In [70]:
# Create bigram dataset (x, y)
xs, ys = [], []

for word in words:
    chs = ['.'] + list(word) + ['.']
    for ch1, ch2 in zip(chs, chs[1:]):
        inx1 = stoi[ch1]
        inx2 = stoi[ch2]
        xs.append(inx1)
        ys.append(inx2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [87]:
xenc = F.one_hot(xs, num_classes=n_chars).float()
yenc = F.one_hot(ys, num_classes=n_chars).float()

In [96]:
W = torch.randn((27, 27))

In [104]:
logits = xenc @ W       # log counts
counts = logits.exp()   # equavalent N
probs = counts / counts.sum(dim=1, keepdim=True) # softmax

In [103]:
probs[0].sum()

tensor(1.0000)