In [16]:
from tqdm import tqdm
import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F

# read data

In [17]:
words = open('../data/names.txt', 'r').read().splitlines()

In [18]:
len(words)

32033

In [19]:
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [20]:
chars = sorted(list(set(''.join(words))))

# util

In [21]:
itos = {}
itos[0] = '.'
itos |= {i+1: s for i, s in enumerate(chars)}

In [22]:
stoi = {s: i for i, s in itos.items()}

# Build a neural language model

## prepare data

In [23]:
block_size = 3
X, Y = [], []
for w in words[:2]:
    # print(w)
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        # print(''.join([itos[i] for i in context]), '--->', itos[ix])
        X.append(context)
        Y.append(ix)
        context = context[1:] + [ix]        

In [24]:
X = torch.tensor(X)
Y = torch.tensor(Y)        

In [25]:
X.shape, Y.shape

(torch.Size([12, 3]), torch.Size([12]))

## embedding matrix C

In [26]:
C = torch.randn(len(itos), 2)
C.shape

torch.Size([27, 2])

In [27]:
emb = C[X]
emb.shape

torch.Size([12, 3, 2])

## hidden layer

In [28]:
w1 = torch.randn(6, 100)
b1 = torch.randn(100)

In [29]:
h = (emb.view(emb.shape[0], -1) @ w1 + b1).tanh()
h.shape

torch.Size([12, 100])

## final layer

In [33]:
w2 = torch.randn(100, len(itos))
b2 = torch.randn(len(itos))

In [34]:
logits = h @ w2 + b2
logits.shape

torch.Size([12, 27])

In [36]:
loss = F.cross_entropy(logits, Y)
loss

tensor(17.1174)

# Train a neural language model