In [None]:
!wget https://raw.githubusercontent.com/karpathy/makemore/master/names.txt

In [None]:
words = open('names.txt', 'r').read().splitlines()

In [None]:
len(words)

In [None]:
min([len(w) for w in words])

In [None]:
max(len(w) for w in words)

In [None]:
import torch

In [None]:
N = torch.zeros((27, 27), dtype = torch.int32)

In [None]:
chars = sorted(list(set(''.join(words))))
stoi = {s:i + 1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}

In [None]:
for w in words:
  chs = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(chs, chs[1:]):
    id1 = stoi[ch1]
    id2 = stoi[ch2]
    N[id1, id2] += 1

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize= (16, 16))
plt.imshow(N, cmap='Blues')
for i in range(27):
  for j in range(27) :
    chstr = itos[i] + itos[j]
    plt.text(j, i, chstr, ha="center", va="bottom", color='gray')
    plt.text(j, i, N[i, j].item(), ha="center", va="top", color='gray')
    plt.axis('off');

In [None]:
P = N / N.sum(axis = 1, keepdim = True)

In [None]:
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
  idx = 0
  out = []
  while True:
    p = P[idx]
    idx = torch.multinomial(p, num_samples = 1, replacement = True, generator = g).item()
    out.append(itos[idx])
    if idx == 0:
      break
  print(''.join(out))

In [None]:
xs = []
ys = []
for w in words[:1]:
  chs = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(chs, chs[1:]):
    idx1 = stoi[ch1]
    idx2 = stoi[ch2]
    xs.append(idx1)
    ys.append(idx2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)

In [None]:
import torch.nn.functional as F
x_enc = F.one_hot(xs, num_classes = 27).float()

In [None]:
plt.imshow(x_enc)

In [None]:
W = torch.randn((27, 27), generator = g, requires_grad=True)
x_enc @ W

In [None]:
nlls = torch.zeros(5)
for i in range(5):
  x = xs[i].item()
  y = ys[i].item()

  p = probs[i, y]

  logp = torch.log(p)

  nll = -logp

  nlls[i] = nll


In [None]:
# forward pass
logits = x_enc @ W #log-counts
counts = logits.exp()
probs = counts / counts.sum(axis = 1, keepdims = True)
loss = -probs[torch.arange(5), ys].log().mean()


In [None]:
loss.backward()

In [None]:
W.data -= 0.1 * W.grad

In [None]:
# create the dataset
xs, ys = [], []
for w in words:
  chs = ['.'] + list(w) + ['.']
  for ch1, ch2 in zip(chs, chs[1:]):
    ix1 = stoi[ch1]
    ix2 = stoi[ch2]
    xs.append(ix1)
    ys.append(ix2)
xs = torch.tensor(xs)
ys = torch.tensor(ys)
num = xs.nelement()
print('number of examples: ', num)

# initialize the 'network'
g = torch.Generator().manual_seed(2147483647)
W = torch.randn((27, 27), generator=g, requires_grad=True)

In [None]:
# gradient descent
for k in range(1):
  
  # forward pass
  xenc = F.one_hot(xs, num_classes=27).float() # input to the network: one-hot encoding
  logits = xenc @ W # predict log-counts
  counts = logits.exp() # counts, equivalent to N
  probs = counts / counts.sum(1, keepdims=True) # probabilities for next character
  loss = -probs[torch.arange(num), ys].log().mean() + 0.01*(W**2).mean()
  print(loss.item())
  
  # backward pass
  W.grad = None # set to zero the gradient
  loss.backward()
  
  # update
  W.data += -50 * W.grad

In [None]:
# finally, sample from the 'neural net' model
g = torch.Generator().manual_seed(2147483647)

for i in range(5):
  
  out = []
  ix = 0
  while True:
    
    # ----------
    # BEFORE:
    #p = P[ix]
    # ----------
    # NOW:
    xenc = F.one_hot(torch.tensor([ix]), num_classes=27).float()
    logits = xenc @ W # predict log-counts
    counts = logits.exp() # counts, equivalent to N
    p = counts / counts.sum(1, keepdims=True) # probabilities for next character
    # ----------
    
    ix = torch.multinomial(p, num_samples=1, replacement=True, generator=g).item()
    out.append(itos[ix])
    if ix == 0:
      break
  print(''.join(out))