In [689]:
import torch
from torch import nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [690]:
words = open("names.txt").read().splitlines()

In [691]:
chars = ['.'] + sorted(list(set(''.join(words))))
stoi = {s: i for i, s in enumerate(chars)}
itos = {i: s for s, i in stoi.items()}

## Count Based

In [692]:
N = torch.zeros((27, 27, 27), dtype=torch.int)

In [693]:
# calculating counts of trigrams
for w in words[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        N[stoi[a], stoi[b], stoi[c]] += 1

In [694]:
# probabilites from counts
P = (N+1).float()
P /= P.sum(2,keepdim=True)

In [695]:
# bigram probabilites, helpful in generating the first two tokens for sampling
B = N.sum(2, keepdim=True).float()
B = B/B.sum(1, keepdim=True)
B = B.reshape((27,27))

In [696]:
# generating from the trigram model
for i in range(10):
    ix1 = 0
    ix2 = torch.multinomial(B[ix1], num_samples=1, replacement=True).item()
    outputs = [itos[ix2]]
    while True:
        p = P[ix1, ix2]
        x = torch.multinomial(p, num_samples=1, replacement=True).item()
        outputs.append(itos[x])
        ix1 = ix2
        ix2 = x
        if x == 0:
            break
    print(''.join(outputs))

elysser.
brynnah.
moudszfkina.
arach.
nova.
sama.
en.
ro.
af.
emelineshmeigae.


In [697]:
# evaluating loss
nll = 0.0
cnt = 0.0
for w in words[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        prob = P[stoi[a], stoi[b], stoi[c]]
        cnt+=1
        logprob = torch.log(prob)
        nll += logprob
        # print(f'{a} {b} {c} : {prob.item():.4f} {logprob.item():.4f}')
nll = -nll/cnt
print(nll.item())

2.092747449874878


## Neural Network

In [698]:
xs = []
ys = []

for w in words[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        xs.append([stoi[a], stoi[b]])
        ys.append(stoi[c])

xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [699]:
# stacking one hot encoded vectors
a1 = F.one_hot(xs[:,0], num_classes=27)
a2 = F.one_hot(xs[:,1], num_classes=27)
xenc = torch.hstack((a1,a2)).float()
nums = xenc.shape[0]

In [700]:
# one layer model
model = nn.Linear(54, 27)

In [701]:
# lossfn for calculating loss and optimizer for upadting gradient
lossfn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [702]:
# train
epochs = 500
for i in range(epochs):
    y = model(xenc)
    
    loss = lossfn(y, ys)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if i % 50 == 0:
        print(f'{loss.item():.4f}')

3.3074
2.2413
2.2375
2.2369
2.2367
2.2366
2.2366
2.2365
2.2365
2.2365


In [703]:
# generate
for i in range(10):
    ix1 = 0
    ix2 = torch.multinomial(B[ix1], num_samples=1, replacement=True).item()
    outputs = [itos[ix2]]
    while True:
        x1 = F.one_hot(torch.tensor(ix1), num_classes=27)
        x2 = F.one_hot(torch.tensor(ix2), num_classes=27)
        xenct = torch.hstack((x1,x2)).float()
        y = model(xenct)
        p = torch.softmax(y,0)
        x = torch.multinomial(p, num_samples=1, replacement=True).item()
        outputs.append(itos[x])
        ix1 = ix2
        ix2 = x
        if x == 0:
            break
    print(''.join(outputs))

adrahrais.
eva.
semi.
almire.
ze.
panahamur.
selarih.
oca.
kyn.
carmeroa.
