# **makemore** character level language model

In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
words = open('names.txt', 'r').read().splitlines()
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [3]:
len(words)

32033

### (re-)building our training dataset

In [4]:
# create the vocabulary of characters and mappings to/from integers
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i, s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s, i in stoi.items()}

In [5]:
# build the dataset

block_size = 3
X, Y = [], []

for w in words[:3]:
    print(w)
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '----->', itos[ix])
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)

emma
... -----> e
..e -----> m
.em -----> m
emm -----> a
mma -----> .
olivia
... -----> o
..o -----> l
.ol -----> i
oli -----> v
liv -----> i
ivi -----> a
via -----> .
ava
... -----> a
..a -----> v
.av -----> a
ava -----> .


In [6]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([16, 3]), torch.int64, torch.Size([16]), torch.int64)

### implementing the embedding lookup table

In [7]:
C = torch.randn((27,2))
C

tensor([[ 4.2060e-01, -2.3366e+00],
        [ 4.1419e-01, -7.6774e-01],
        [-1.8118e-01, -6.0973e-01],
        [-2.1867e-03, -3.8699e-01],
        [-4.8188e-01,  9.0023e-01],
        [-1.4293e-01,  1.1119e+00],
        [ 2.0074e+00, -8.1291e-01],
        [ 4.6895e-02,  1.8592e+00],
        [-2.2277e-01, -8.2405e-01],
        [-1.0707e+00,  1.0243e-02],
        [ 1.9754e+00,  1.1096e+00],
        [-4.9029e-01,  1.7643e-01],
        [ 3.0319e-01, -1.4487e+00],
        [ 9.0043e-02, -4.4612e-03],
        [-2.5091e-01,  6.1851e-01],
        [ 2.5915e+00, -4.6602e-02],
        [ 1.6063e-01,  2.2177e-01],
        [ 7.9119e-01,  3.8214e-01],
        [-2.5124e-01,  2.7815e-01],
        [ 1.2324e+00,  6.7852e-01],
        [-9.8188e-01,  2.9362e-01],
        [ 8.1642e-01,  2.2237e+00],
        [ 3.5461e-01,  1.1289e+00],
        [-3.3452e-01, -6.2480e-02],
        [-1.4059e+00, -2.9124e+00],
        [ 6.0960e-03, -1.5552e+00],
        [-1.6683e+00, -1.4582e-01]])

In [8]:
emb = C[X]
emb.shape

torch.Size([16, 3, 2])

### implementing the hidden layer

In [9]:
W1 = torch.randn((3*2, 100))
b = torch.randn(100)

In [10]:
h = emb.view(emb.shape[0], 6) @ W1 + b
h = torch.tanh(h)
h

tensor([[-1.0000, -0.9631,  0.9960,  ...,  1.0000,  0.9982,  1.0000],
        [-0.8924,  0.6418, -0.9995,  ...,  0.5443,  0.6438,  0.9999],
        [-0.9776,  0.9960, -0.9881,  ..., -0.9795, -0.6629,  0.9998],
        ...,
        [-0.9998, -0.7042,  0.4726,  ...,  0.9945,  0.9443,  1.0000],
        [-0.9176,  0.9830, -0.9970,  ..., -0.9805, -0.9300,  0.9998],
        [-0.9987,  0.9597,  0.8682,  ..., -0.9767, -0.9403,  0.9998]])

In [11]:
h.shape

torch.Size([16, 100])

### implementing the output layer

In [12]:
W2 = torch.randn((100, 27))
b = torch.randn(27)

In [13]:
logits = h @ W2 + b
logits

tensor([[ 4.3915e-01, -3.7228e+00, -1.2891e+01, -2.0526e+00,  7.3219e+00,
          8.1779e+00,  8.1899e+00,  5.9789e+00, -8.4125e+00,  4.7627e+00,
         -1.5608e+01,  1.0954e+01,  4.6216e+00, -3.3486e+00, -9.1435e+00,
          3.2498e+00,  7.2785e+00, -2.2141e+00,  2.9353e+00,  8.8912e+00,
          8.7133e+00,  4.5743e-01,  4.6461e+00,  3.2540e-01, -4.2130e+00,
         -1.0861e+01, -3.4935e+00],
        [ 8.9282e+00, -7.1626e+00, -2.4108e+01, -2.0617e+00, -1.6109e+00,
          1.0850e+00,  1.0771e+01, -1.4751e+01, -6.8082e+00, -1.6813e+00,
         -1.3958e+01,  9.7160e+00, -2.4784e+00, -2.7815e-01, -1.7351e+01,
          4.1677e+00,  1.1386e+01, -4.2163e+00,  4.6554e+00,  7.9534e+00,
         -7.7984e-01,  1.1584e+01,  4.2086e+00,  4.1268e+00, -4.3087e+00,
          4.8384e-01, -1.1753e+01],
        [ 1.4383e+01, -8.5114e+00,  3.0757e+00, -3.7653e+00, -9.4435e+00,
          1.5244e+01,  1.1683e+01, -7.9438e+00,  1.1821e+00,  2.0264e+00,
         -1.3665e+01,  2.9424e+00,  3.28

In [14]:
logits.shape

torch.Size([16, 27])