In [1]:
words = open('names.txt', 'r').read().splitlines()

In [2]:
# ----- Single layer network ------- #
import torch

In [3]:
sorted_letters = sorted(set(''.join(words)))
itos = {}
stoi = {}
for index, ch in enumerate(sorted_letters):
    itos[index + 1] = ch
    stoi[ch] = index + 1
itos[0] = '.'
stoi['.'] = 0

In [33]:
xs = []
ys = []
for w in words:
    new_w = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(new_w, new_w[1:]):
        xs.append(stoi.get(ch1))
        ys.append(stoi.get(ch2))
        # print(ch1, ch2)

xs = torch.tensor(xs)
ys = torch.tensor(ys)
ys

tensor([ 5, 13, 13,  ..., 26, 24,  0])

In [25]:
# We cannot feed these integers to the neural network. We need to convert them to vectors to some way
# one hot encoding
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline
xenc = F.one_hot(xs, num_classes=len(itos)).float()
# plt.imshow(xenc)

In [44]:
# randomly initialize 27 neurons' weights. each neuron receives 27 inputs
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)

In [45]:
logits = xenc @ W # log count
counts = logits.exp() # count , N , # softmax_1
probs = counts / counts.sum(1, keepdims=True) # softmax_2



In [46]:
[probs[i].sum() for i in range(5)]

[tensor(1.0000, grad_fn=<SumBackward0>),
 tensor(1., grad_fn=<SumBackward0>),
 tensor(1., grad_fn=<SumBackward0>),
 tensor(1., grad_fn=<SumBackward0>),
 tensor(1., grad_fn=<SumBackward0>)]

In [47]:
# calculating loss
nlls = torch.zeros(5)
for i in range(5):
    x = xs[i].item()
    y = ys[i].item()
    p = probs[i, y]
    log_likelihood = torch.log(p)
    negative_log_likelihood = -log_likelihood
    nlls[i] = negative_log_likelihood

nlls.mean().item()

3.7693049907684326

In [66]:
# Prepare the data
xs = []
ys = []
for w in words:
    new_w = ['.'] + list(w) + ['.']
    for ch1, ch2 in zip(new_w, new_w[1:]):
        idx1 = stoi.get(ch1)
        idx2 = stoi.get(ch2)
        xs.append(idx1)
        ys.append(idx2)

num = len(xs)
xs = torch.tensor(xs)
xenc = F.one_hot(xs, num_classes=len(itos)).float()


In [67]:
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)

for k in range(1000):
    # Forward pass
    logits = xenc @ W
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdims=True)
    p = probs[torch.arange(num), ys]
    loss = -p.log().mean() + 0.01 * (W**2).mean()
    print(loss.item())
    # backward pass
    W.grad = None
    loss.backward()

    # update
    W.data += (-50) * W.grad


3.7686190605163574
3.3787858486175537
3.1610772609710693
3.027181386947632
2.9344801902770996
2.8672285079956055
2.816653251647949
2.7771458625793457
2.745253562927246
2.7188305854797363
2.696505546569824
2.6773722171783447
2.6608054637908936
2.6463515758514404
2.633665084838867
2.622471332550049
2.6125471591949463
2.6037063598632812
2.595794439315796
2.5886809825897217
2.5822560787200928
2.5764293670654297
2.5711233615875244
2.566272497177124
2.5618226528167725
2.5577261447906494
2.5539441108703613
2.550442695617676
2.547192335128784
2.5441696643829346
2.5413525104522705
2.538722038269043
2.536262035369873
2.5339579582214355
2.531797409057617
2.529768228530884
2.527860164642334
2.5260636806488037
2.5243709087371826
2.522773265838623
2.52126407623291
2.519836664199829
2.5184857845306396
2.5172054767608643
2.515990972518921
2.5148372650146484
2.5137410163879395
2.51269793510437
2.511704921722412
2.5107579231262207
2.509854793548584
2.5089921951293945
2.5081679821014404
2.507380485534668

In [73]:
g = torch.Generator().manual_seed(2147483647)
for i in range(10):
    out = ''
    idx = 0
    while True:
        xenc = F.one_hot(torch.tensor([idx]), num_classes=27).float()
        logits = xenc @ W
        counts = logits.exp()
        probs = counts / counts.sum(1, keepdims=True)
        idx = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
        out += itos[idx]
        if idx == 0:
            break

    print(out)

cexze.
momasurailezityha.
konimittain.
llayn.
ka.
da.
staiyaubrtthrigotai.
moliellavo.
ke.
teda.


In [74]:
# finally, sample from the 'neural net' model
g = torch.Generator().manual_seed(2147483647)

for i in range(10):

  out = []
  ix = 0
  while True:

    # ----------
    # BEFORE:
    #p = P[ix]
    # ----------
    # NOW:
    xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()
    logits = xenc @ W # predict log-counts
    counts = logits.exp() # counts, equivalent to N
    p = counts / counts.sum(1, keepdims=True) # probabilities for next character
    # ----------

    ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
    out.append(itos[ix])
    if ix == 0:
      break
  print(''.join(out))

cexze.
momasurailezityha.
konimittain.
llayn.
ka.
da.
staiyaubrtthrigotai.
moliellavo.
ke.
teda.
