In [2]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
words = open('./names.txt').read().splitlines()
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [22]:
chars = sorted(list(set(''.join(words)))) #lol
token_lookup = {c: i+1 for i, c in enumerate(chars)}
token_lookup['.'] = 0
char_lookup = {i:c for c, i in token_lookup.items()}
TOTAL_TOKENS = len(char_lookup.keys())

In [21]:
# build the dataset

BLOCK_SIZE = 3 # context size to give the model in order to predict the next character
X, Y = [], []

for word in words[:5]:
    
    start_padding = "." * BLOCK_SIZE
    padded_word = f"{start_padding}{word}."
    tokenized_word = [token_lookup[c] for c in padded_word]
    for i in range(len(tokenized_word)-BLOCK_SIZE):
        X.append(tokenized_word[i:i+BLOCK_SIZE])
        Y.append(tokenized_word[i+BLOCK_SIZE])
        
X = torch.tensor(X)
Y = torch.tensor(Y)
X.shape, Y.shape

(torch.Size([32, 3]), torch.Size([32]))

In [38]:
# embed 27 characters into two dimensional space
EMBEDDING_DIMS = 2
C = torch.randn((TOTAL_TOKENS, EMBEDDING_DIMS))
C

tensor([[-0.9871, -1.0724],
        [-0.8571,  1.6126],
        [ 0.2801,  0.4489],
        [-1.7662, -0.8762],
        [ 1.8949, -0.9842],
        [ 0.1778,  1.3857],
        [-1.2596,  0.1909],
        [ 0.5895,  0.2569],
        [ 2.1765, -1.2076],
        [ 0.1602, -0.8000],
        [-0.1853, -0.1956],
        [-0.5587, -0.5373],
        [-0.3823, -1.1243],
        [-0.1691,  1.6164],
        [-0.3926,  0.4609],
        [ 0.5397,  1.0564],
        [ 1.1892, -0.5385],
        [ 0.0031, -0.9613],
        [-0.0127,  0.7453],
        [-0.7636,  1.2136],
        [ 0.3422, -1.5489],
        [ 0.0591, -0.8433],
        [ 0.1192, -1.1831],
        [ 0.8894, -0.1981],
        [-0.5064,  1.0326],
        [-0.9917, -0.0787],
        [ 0.5777, -0.3089]])

In [47]:
embeddings = C[X]
TOTAL_NEURONS = 100
W1 = torch.randn((EMBEDDING_DIMS * BLOCK_SIZE, TOTAL_NEURONS))
b1 = torch.randn((TOTAL_NEURONS,))
W1.shape, b1.shape

(torch.Size([6, 100]), torch.Size([100]))

In [62]:
cat_embeddings = embeddings.view(-1, EMBEDDING_DIMS * BLOCK_SIZE)
hidden_states_01 = torch.tanh(cat_embeddings @ W1 + b1)
hidden_states_01.shape

torch.Size([32, 100])

In [69]:
W2 = torch.randn(TOTAL_NEURONS, TOTAL_TOKENS)
hidden_states_02 = hidden_states_01 @ W2
logits = hidden_states_02.exp()
logits = logits / logits.sum(0)
logits

tensor([[9.6389e-05, 7.7032e-08, 1.0936e-03, 5.5758e-06, 1.9664e-01, 1.3755e-09,
         2.7387e-05, 6.4887e-06, 2.0643e-10, 1.5950e-06, 3.3658e-02, 4.5548e-10,
         2.9393e-02, 1.2991e-11, 3.4195e-02, 6.7423e-06, 1.9119e-05, 4.4475e-11,
         2.0776e-07, 3.1824e-07, 1.4164e-09, 2.2206e-06, 4.2816e-12, 8.0264e-04,
         2.2207e-11, 2.3303e-03, 6.7624e-15],
        [6.6097e-02, 1.1638e-09, 3.4729e-10, 1.2330e-08, 7.8095e-07, 4.6742e-07,
         4.2017e-08, 2.8688e-07, 1.0338e-07, 3.4273e-03, 5.6219e-05, 2.2609e-05,
         1.9877e-04, 4.9575e-12, 2.2612e-04, 3.0047e-04, 1.0991e-04, 6.0704e-08,
         1.5237e-01, 1.5850e-02, 2.9657e-05, 2.6801e-06, 2.6907e-13, 1.1732e-07,
         2.3879e-07, 6.2146e-03, 5.0326e-10],
        [6.0226e-03, 8.7509e-06, 6.5075e-08, 7.3390e-01, 1.1866e-06, 3.5420e-10,
         8.0445e-01, 3.7558e-03, 2.0167e-01, 3.8683e-03, 1.7622e-05, 3.7105e-06,
         4.6019e-10, 3.4551e-09, 2.2274e-08, 5.7843e-08, 8.8595e-10, 1.9433e-06,
         1.0875e-