In [257]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [258]:
words = open('names.txt', 'r').read().splitlines()
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [259]:
chars = sorted(list(set(''.join(words))))
stoi = {s: i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i: s for s, i in stoi.items()}

In [459]:
block_size = 3
x, y = [], []
for w in words:
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        x.append(context)
        y.append(ix)
        # print(''.join(itos[i] for i in context), '--->', itos[ix])
        context = context[1:] + [ix]

X = torch.tensor(x)
y = torch.tensor(y)

In [468]:
g = torch.Generator().manual_seed(2147483647) # for reproducibility
C = torch.randn((27, 2), generator=g)
W1 = torch.randn((6, 100), generator=g)
b1 = torch.randn(100, generator=g)
W2 = torch.randn((100, 27), generator=g)
b2 = torch.randn(27, generator=g)
parameters = [C, W1, b1, W2, b2]


In [461]:
sum(p.nelement() for p in parameters)

3481

In [470]:
for _ in range(10):
    emb = C[X]                  # (32, 3, 2)
    h = torch.tanh(emb.view(-1, 6) @ W1 + b1) 
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits, y)
    # print(loss.item())
    for p in parameters:
        p.requires_grad = True
    loss.backward()
    for p in parameters:
        p.data += -0.1 * p.grad
print(loss.item())

8.103053092956543


In [471]:
torch.randint(0, X.shape[0], (32,))

tensor([127185, 178125,  89198,  23869, 103451,    869, 212750, 195372,  19814,
        175561,  51633,  51046,  51961,  61126, 177957, 213182, 210303,  35111,
         68761,  96671, 178855,  98784, 160878, 132134,  67152, 164345,  94577,
        225311,  43448, 221284,  39772, 198307])

In [453]:
logits.max(1)

torch.return_types.max(
values=tensor([ 68.7686,  70.7097,  76.2929,  44.4930,  74.8306,  68.7686,  90.0052,
         92.6099,  52.3242,  80.7221,  89.1354,  66.3278,  68.7686,  67.2999,
         72.3591,  66.5531,  68.7686,  79.5168,  65.0536,  75.0781,  64.0188,
         46.9917,  87.6652, 124.3027,  99.5297,  68.7686,  78.5693,  91.3336,
         81.6630,  44.7182,  63.7966,  85.2898], grad_fn=<MaxBackward0>),
indices=tensor([ 9, 13, 13,  1,  0,  9, 12,  9, 22,  9,  1,  0,  9, 22,  1,  0,  9, 19,
         1,  2,  5, 12, 12,  1,  0,  9, 15, 16,  8,  9,  1,  0]))

In [None]:
F.cross_entropy(logits, y)

tensor(16.9465)

In [None]:
logits1 = torch.tensor([-100, -2, 14, 1])
logits2 = torch.tensor([-100, -2, 14, 100]) +100
counts1 = logits1.exp()
counts2 = logits2.exp()
print(counts1)
print(counts2)
prob = counts / counts.sum()
prob

tensor([3.7835e-44, 1.3534e-01, 1.2026e+06, 2.7183e+00])
tensor([1., inf, inf, inf])


tensor([0.0000e+00, 1.1254e-07, 1.0000e+00, 8.3153e-07])

In [None]:
counts

tensor([3.7835e-44, 1.3534e-01, 1.2026e+06, 1.0000e+00])

In [None]:
t1 = torch.arange(30)
t1 = t1.view(6, 5)
arange = torch.arange(6)
nrange = torch.randint(0, 5, (6, ))
t1[arange, nrange]


tensor([ 3,  8, 13, 18, 23, 28])

In [None]:
loss = -prob[torch.arange(32), y].log().mean()
loss

tensor(16.7798)