In [162]:
import torch
from torch import nn
import torch.nn.functional as F
import random
import matplotlib.pyplot as plt
%matplotlib inline

In [163]:
words = open("names.txt").read().splitlines()

In [164]:
chars = ['.'] + sorted(list(set(''.join(words))))
stoi = {s: i for i, s in enumerate(chars)}
itos = {i: s for s, i in stoi.items()}

## Count Based

In [165]:
N = torch.zeros((27, 27, 27), dtype=torch.int)

In [166]:
# calculating counts of trigrams
for w in words[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        N[stoi[a], stoi[b], stoi[c]] += 1

In [167]:
# probabilites from counts
P = (N+1).float()
P /= P.sum(2,keepdim=True)

In [168]:
# bigram probabilites, helpful in generating the first two tokens for sampling
B = N.sum(2, keepdim=True).float()
B = B/B.sum(1, keepdim=True)
B = B.reshape((27,27))

In [169]:
# generating from the trigram model
for i in range(10):
    ix1 = 0
    ix2 = torch.multinomial(B[ix1], num_samples=1, replacement=True).item()
    outputs = [itos[ix2]]
    while True:
        p = P[ix1, ix2]
        x = torch.multinomial(p, num_samples=1, replacement=True).item()
        outputs.append(itos[x])
        ix1 = ix2
        ix2 = x
        if x == 0:
            break
    print(''.join(outputs))

jisone.
andeolla.
aurpmcvtson.
ma.
trius.
ruingolden.
mell.
vion.
jaxfiq.
kaygvyon.


In [170]:
# evaluating loss 
nll = 0.0
cnt = 0.0
for w in words[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        prob = P[stoi[a], stoi[b], stoi[c]]
        cnt+=1
        logprob = torch.log(prob)
        nll += logprob
        # print(f'{a} {b} {c} : {prob.item():.4f} {logprob.item():.4f}')
nll = -nll/cnt
print(nll.item())

2.092747449874878


## Neural Network

In [171]:
# return index of (a,b) by flattening it 
ctx = 27
def get_idx(a,b):
    return ctx * a + b

In [172]:
xs = []
ys = []

for w in words[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        xs.append(get_idx(stoi[a],stoi[b]))
        ys.append(stoi[c])
        
xs = torch.tensor(xs)
ys = torch.tensor(ys)


In [173]:
# one hot encoded vectors 
xenc = F.one_hot(xs, num_classes=27*27).float()
nums = xenc.shape[0]
nums

196113

In [174]:
# one layer model
model = nn.Linear(729, 27)

In [175]:
# lossfn for calculating loss and optimizer for upadting gradient
lossfn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)

In [176]:
# train
epochs = 100
epfact = epochs/10
for i in range(epochs):
    y = model(xenc)
    
    loss = lossfn(y, ys)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if i % epfact == 0:
        print(f'train: {loss.item():.4f}')

train: 3.2908
train: 2.5307
train: 2.2431
train: 2.1438
train: 2.1074
train: 2.0916
train: 2.0832
train: 2.0785
train: 2.0755
train: 2.0734


In [249]:
# generate
for i in range(10):
    ix1 = 0
    ix2 = torch.multinomial(B[ix1], num_samples=1, replacement=True).item()
    outputs = [itos[ix2]]
    while True:
        d = torch.tensor(get_index(ix1,ix2))
        xen  = F.one_hot(d, num_classes=27*27).float()
        y = model(xen)
        p = torch.softmax(y,0)
        x = torch.multinomial(p, num_samples=1, replacement=True).item()
        outputs.append(itos[x])
        ix1 = ix2
        ix2 = x
        if x == 0:
            break
    print(''.join(outputs))

zykubia.
jamyahrykieleni.
jian.
zir.
dhvian.
adarzah.
zajamelleah.
hayvie.
sama.
masser.
