In [779]:
import torch
from torch import nn
import torch.nn.functional as F
import random
import matplotlib.pyplot as plt
%matplotlib inline

In [780]:
words = open("names.txt").read().splitlines()
random.seed(42)
random.shuffle(words)

In [781]:
chars = ['.'] + sorted(list(set(''.join(words))))
stoi = {s: i for i, s in enumerate(chars)}
itos = {i: s for s, i in stoi.items()}

In [782]:
nsize = len(words)
tsize = int(0.8*nsize)
trainset = words[:tsize]
testset = words[tsize:]

## Count Based

N = torch.zeros((27, 27, 27), dtype=torch.int)

In [783]:
# calculating counts of trigrams
for w in trainset[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        N[stoi[a], stoi[b], stoi[c]] += 1

In [784]:
# probabilites from counts
P = (N+1).float()
P /= P.sum(2,keepdim=True)

In [785]:
# bigram probabilites, helpful in generating the first two tokens for sampling
B = N.sum(2, keepdim=True).float()
B = B/B.sum(1, keepdim=True)
B = B.reshape((27,27))

In [790]:
# generating from the trigram model
for i in range(10):
    ix1 = 0
    ix2 = torch.multinomial(B[ix1], num_samples=1, replacement=True).item()
    outputs = [itos[ix2]]
    while True:
        p = P[ix1, ix2]
        x = torch.multinomial(p, num_samples=1, replacement=True).item()
        outputs.append(itos[x])
        ix1 = ix2
        ix2 = x
        if x == 0:
            break
    print(''.join(outputs))

kayahmxgmtre.
ay.
marnwara.
osidel.
alas.
elah.
akini.
milamicksiavi.
briccolilleeniceigh.
tron.


In [795]:
# evaluating loss on trainset
nll = 0.0
cnt = 0.0
for w in trainset[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        prob = P[stoi[a], stoi[b], stoi[c]]
        cnt+=1
        logprob = torch.log(prob)
        nll += logprob
        # print(f'{a} {b} {c} : {prob.item():.4f} {logprob.item():.4f}')
nll = -nll/cnt
print(nll.item())

2.0737550258636475


In [796]:
# evaluating loss on testset
nll = 0.0
cnt = 0.0
for w in testset[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        prob = P[stoi[a], stoi[b], stoi[c]]
        cnt+=1
        logprob = torch.log(prob)
        nll += logprob
        # print(f'{a} {b} {c} : {prob.item():.4f} {logprob.item():.4f}')
nll = -nll/cnt
print(nll.item())

2.0904829502105713


## Neural Network

In [809]:
xs = []
ys = []
xst = []
yst = []

for w in trainset[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        xs.append([stoi[a], stoi[b]])
        ys.append(stoi[c])

for w in testset[:]:
    w = ['.'] + list(w) + ['.']
    for i in range(len(w)-2):
        a, b, c = w[i], w[i+1], w[i+2]
        xst.append([stoi[a], stoi[b]])
        yst.append(stoi[c])

xs = torch.tensor(xs)
ys = torch.tensor(ys)
xst = torch.tensor(xst)
yst = torch.tensor(yst)

In [819]:
# stacking one hot encoded vectors train
a1 = F.one_hot(xs[:,0], num_classes=27)
a2 = F.one_hot(xs[:,1], num_classes=27)
xenc = torch.hstack((a1,a2)).float()
nums = xenc.shape[0]

# stacking one hot encoded vectors test 
b1 = F.one_hot(xst[:,0], num_classes=27)
b2 = F.one_hot(xst[:,1], num_classes=27)
xenct = torch.hstack((b1,b2)).float()
nums = xenc.shape[0]

In [820]:
# one layer model
model = nn.Linear(54, 27)

In [821]:
# lossfn for calculating loss and optimizer for upadting gradient
lossfn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.05)

In [822]:
# train
epochs = 500
epfact = epochs/10
for i in range(epochs):
    # train
    y = model(xenc)
    
    loss = lossfn(y, ys)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # test
    yt = model(xenct)
    losst = lossfn(yt, yst)
    
    if i % epfact == 0:
        print(f'train: {loss.item():.4f} || test: {losst.item():.4f}')

train: 3.3053 || test: 3.1599
train: 2.2530 || test: 2.2540
train: 2.2408 || test: 2.2428
train: 2.2386 || test: 2.2410
train: 2.2377 || test: 2.2405
train: 2.2373 || test: 2.2403
train: 2.2371 || test: 2.2402
train: 2.2369 || test: 2.2402
train: 2.2368 || test: 2.2402
train: 2.2367 || test: 2.2402


In [848]:
# generate
for i in range(10):
    ix1 = 0
    ix2 = torch.multinomial(B[ix1], num_samples=1, replacement=True).item()
    outputs = [itos[ix2]]
    while True:
        x1 = F.one_hot(torch.tensor(ix1), num_classes=27)
        x2 = F.one_hot(torch.tensor(ix2), num_classes=27)
        xen = torch.hstack((x1,x2)).float()
        y = model(xen)
        p = torch.softmax(y,0)
        x = torch.multinomial(p, num_samples=1, replacement=True).item()
        outputs.append(itos[x])
        ix1 = ix2
        ix2 = x
        if x == 0:
            break
    print(''.join(outputs))

dwahia.
ah.
jenth.
brnel.
ailinyland.
chylen.
mari.
keigubira.
ben.
kariclia.
