In [73]:
import torch
import torch.nn.functional as F

In [16]:
words = open('names.txt','r').read().splitlines()

In [17]:
len(words)

32033

## Bigram Model

In [55]:
#use pytorch tensor
N = torch.zeros((27,27), dtype=torch.int32)
chars = sorted(list(set(''.join(words)))) #list of chars
stoi = {s:i+1 for i,s in enumerate(chars)}#mapping of chars to integers
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}
for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs,chs[1:]):
        N[stoi[ch1], stoi[ch2]] += 1

In [60]:
#sample from probability distribution
P = N.float()
P = P / P.sum(1, keepdim = True) #our pdf array
#check broadcasting rules to see if you can divide like this!
#many bugs come from broadcasting rules

In [61]:
#generate 20 names
ix = 0
g = torch.Generator().manual_seed(2147483647)
for _ in range(20):
    out = []
    while True:
        p = P[ix] #our pdf
        ix = torch.multinomial(p, num_samples = 1, replacement = True, generator = g).item()
        out.append(itos[ix])
        if ix == 0: #first index is where we store the ending
            break
    print(''.join(out))

mor.
axx.
minaymoryles.
kondlaisah.
anchshizarie.
odaren.
iaddash.
h.
jhinatien.
egushl.
h.
br.
a.
jayn.
ilemannariaenien.
be.
f.
akiinela.
trttanakeroruceyaaxatona.
lamoynayrkiedengin.


In [70]:
# calculate log likelihood to see how good our model is
#use negative log likelihood and normalize for loss function
log_likelihood = 0
n = 0
for w in ['deez']:#words[:3]:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs,chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        N[ix1, ix2] += 1
        prob = torch.log(P[ix1, ix2])
        log_likelihood += prob
        print(f'{ch1}{ch2}: {prob:0.4f}')
        n+=1
print(log_likelihood)
print(-log_likelihood/n)

.d: -2.9420
de: -1.4548
ee: -2.7769
ez: -4.7259
z.: -2.7072
tensor(-14.6069)
tensor(2.9214)


## Incorporating Neural Networks

In [134]:
#create training set
xs, ys = [], [] #inputs, outputs

for w in words:
    chs = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(chs,chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1)
        ys.append(ix2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)

g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27,27), generator=g, requires_grad = True)

In [None]:
#gradient descent
for _ in range(100):
    #use one hot encoding for integers to convert integer to vector
    xenc = F.one_hot(xs, num_classes = 27).float() #need to cast to float for neural net operations
    
    #softmax, convert neuralnet to pdf
    counts = (xenc @ W).exp() #exponentiate to keep everything positive
    prob = counts/counts.sum(1, keepdims=True)
    
    #compute loss
    loss = -prob[torch.arange(xs.nelement()), ys].log().mean() #-log likelihood
    print(loss.item())
    
    #backward pass
    W.grad = None
    loss.backward()
    
    #update with gradients
    W.data += -50*W.grad

In [144]:
#sampling from neural net
#generate 20 names
g = torch.Generator().manual_seed(2147483647)
for _ in range(20):
    out = []
    ix = 0
    while True:
        xenc = F.one_hot(torch.tensor([ix]), num_classes = 27).float()
        logits = xenc @ W #predicted log counts
        counts = logits.exp()
        p = counts/counts.sum(1, keepdims = True)
        
        ix = torch.multinomial(p, num_samples = 1, replacement = True, generator = g).item()
        out.append(itos[ix])
        if ix == 0: #first index is where we store the ending
            break
    print(''.join(out))

mor.
axwaninaymoryles.
kondmaisah.
anchshizarie.
odaren.
iaddash.
h.
jionatien.
egvonn.
ga.
t.
a.
jayn.
ilemannariaenien.
ad.
f.
akiinela.
trttanakerorudayaaxbrima.
lamoyonutonadengin.
torrederahmokallovxjos.
