In [10]:
import torch
import matplotlib.pyplot as plt
words = open('names.txt', 'r').read().splitlines()

In [7]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

In [13]:
# creating the training set of bigrams (x,y)
xs, ys = [], []

for w in words:
    chars = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chars, chars[1:]):
        idx1 = stoi[ch1]
        idx2 = stoi[ch2]
        xs.append(idx1)
        ys.append(idx2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [41]:
# one hot encoding
import torch.nn.functional as F

In [37]:
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27,27), generator = g)

In [42]:
xenc = F.one_hot(xs, num_classes=27).float()
xenc.shape
#plt.imshow(xenc[:20])
logits = (xenc @ W)
counts = logits.exp()
prob = counts / counts.sum(1, keepdims = True)
prob.shape

torch.Size([228146, 27])

In [44]:
nlls = torch.zeros(5)
for i in range(5):
    x = xs[i].item()
    y = ys[i].item()
    print('--------------------------------------------------------------------------')
    print(f'bigram example {i+1}: {itos[x]}{itos[y]} (indices {x}, {y})')
    print('input to the neural net : ', x)
    print('output prob from neural net : ', prob[i])
    print('label (actual next character):', y)
    p = prob[i, y]
    print('probability assigned by the net to the correct character:', p.item())
    logp = torch.log(p)
    print('log likelihood:', logp.item())
    nll = -logp
    print('negative log likelihood:', nll.item())
    nlls[i] = nll

print("================================================================================")
print('average negative log likelihood i.e., loss : ', nlls.mean().item())

--------------------------------------------------------------------------
bigram example 1: .e (indices 0, 5)
input to the neural net :  0
output prob from neural net :  tensor([0.0607, 0.0100, 0.0123, 0.0042, 0.0168, 0.0123, 0.0027, 0.0232, 0.0137,
        0.0313, 0.0079, 0.0278, 0.0091, 0.0082, 0.0500, 0.2378, 0.0603, 0.0025,
        0.0249, 0.0055, 0.0339, 0.0109, 0.0029, 0.0198, 0.0118, 0.1537, 0.1459])
label (actual next character): 5
probability assigned by the net to the correct character: 0.012286253273487091
log likelihood: -4.3992743492126465
negative log likelihood: 4.3992743492126465
--------------------------------------------------------------------------
bigram example 2: em (indices 5, 13)
input to the neural net :  5
output prob from neural net :  tensor([0.0290, 0.0796, 0.0248, 0.0521, 0.1989, 0.0289, 0.0094, 0.0335, 0.0097,
        0.0301, 0.0702, 0.0228, 0.0115, 0.0181, 0.0108, 0.0315, 0.0291, 0.0045,
        0.0916, 0.0215, 0.0486, 0.0300, 0.0501, 0.0027, 0.0118, 

In [53]:
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27,27), generator = g, requires_grad=True)

In [63]:
#foreward pass
xenc = F.one_hot(xs, num_classes=27).float()
logits = (xenc @ W)
counts = logits.exp()
probs = counts / counts.sum(1, keepdims = True)
loss = -probs[torch.arange(5), ys[:5]].log().mean()

In [61]:
# backward pass
W.grad = None #zero the gradients
loss.backward()

In [62]:
W.data += -0.1 * W.grad

In [65]:
print(loss.item())

3.7291626930236816


**Complete flow :**

In [71]:
#creating the dataset
xs, ys = [], []
for w in words:
    chars = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chars, chars[1:]):
        idx1 = stoi[ch1]
        idx2 = stoi[ch2]
        xs.append(idx1)
        ys.append(idx2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('number of examples: ', num)

# initialize the simple neural net
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27,27), generator = g, requires_grad=True)

number of examples:  228146


In [79]:
for k in range(100):
    #foreward pass
    xenc = F.one_hot(xs, num_classes=27).float()
    logits = (xenc @ W)
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdims = True)
    loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean() # regularizing to get non zero probs
    print(loss.item())

    # backward pass
    W.grad = None #zero the gradients
    loss.backward()

    #update
    W.data += -50 * W.grad

2.4813597202301025
2.481353998184204
2.4813482761383057
2.481342315673828
2.4813365936279297
2.4813311100006104
2.481325387954712
2.4813199043273926
2.481314182281494
2.481308698654175
2.4813032150268555
2.481297492980957
2.481292486190796
2.4812870025634766
2.4812819957733154
2.481276750564575
2.481271266937256
2.4812662601470947
2.4812610149383545
2.4812560081481934
2.4812510013580322
2.481245994567871
2.48124098777771
2.481236219406128
2.4812307357788086
2.4812262058258057
2.4812211990356445
2.4812166690826416
2.4812119007110596
2.4812071323394775
2.4812026023864746
2.481198310852051
2.4811933040618896
2.4811887741088867
2.481184244155884
2.481179714202881
2.481175184249878
2.481170892715454
2.4811666011810303
2.4811618328094482
2.4811577796936035
2.481153726577759
2.481149435043335
2.481145143508911
2.4811408519744873
2.4811365604400635
2.481132745742798
2.481128215789795
2.48112416267395
2.4811203479766846
2.48111629486084
2.481112241744995
2.4811084270477295
2.4811043739318848
2.

In [81]:
# sample from Neural Net Model
g = torch.Generator().manual_seed(2147483647)

for i in range(20):
    out = []
    ix = 0
    while True:
        # p = P[ix] --> before
        xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()
        logits = (xenc @ W)
        counts = logits.exp()
        p = counts / counts.sum(1, keepdims = True)
        
        ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
        if ix == 0:
            break
        out.append(itos[ix])
    print(''.join(out))

cexze
momasurailezityha
konimittain
llayn
ka
da
staiyaubrtthrigotai
moliellavo
ke
teda
ka
emimmsade
enkaviyny
ftlsp
hinivenvtahlasu
dsor
br
jol
pen
aisan
