In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
words = open('names.txt', 'r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [3]:
len(words)

32033

In [4]:
# build the vocabulary of characters and mapping to/from integers
chars = sorted(list(set(''.join(words))))
stoi = {s: i+1 for i,s in enumerate(chars)}
stoi['.']=0
itos = {i:s for s,i in stoi.items()}
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [5]:
# build the dataset

block_size = 3 # context length: how many characters do we take to predict the next one?
X, Y = [], []
word_subset = 5 # number of words to use as subset for ease

for w in words[:word_subset]:
    print(w)
    context = [0]*block_size
    # print(context)
    for ch in w+'.':
        ix=stoi[ch]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '----->', itos[ix])
        # print(context, '--->', ix)
        context = context[1:] + [ix]
        
X = torch.tensor(X)
Y = torch.tensor(Y)
# print(X)
# print(Y)



emma
... -----> e
..e -----> m
.em -----> m
emm -----> a
mma -----> .
olivia
... -----> o
..o -----> l
.ol -----> i
oli -----> v
liv -----> i
ivi -----> a
via -----> .
ava
... -----> a
..a -----> v
.av -----> a
ava -----> .
isabella
... -----> i
..i -----> s
.is -----> a
isa -----> b
sab -----> e
abe -----> l
bel -----> l
ell -----> a
lla -----> .
sophia
... -----> s
..s -----> o
.so -----> p
sop -----> h
oph -----> i
phi -----> a
hia -----> .


In [6]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

In [7]:
embedding_dimension = 2
C = torch.randn((27, embedding_dimension))

In [8]:
emb = C[X]
emb.shape

torch.Size([32, 3, 2])

In [9]:
W1 = torch.rand((6, 100))
b1 = torch.rand(100)

In [10]:
h = torch.tanh(emb.view(-1,6) @ W1 + b1)

In [11]:
h.shape

torch.Size([32, 100])

In [12]:
h

tensor([[ 0.9097,  0.8623,  0.8844,  ...,  0.7231,  0.8687,  0.6856],
        [ 0.4161,  0.0192, -0.4967,  ...,  0.8055,  0.5773, -0.4538],
        [-0.3747,  0.6074,  0.0551,  ...,  0.3062, -0.7884,  0.2767],
        ...,
        [ 0.9786,  0.9991,  0.9989,  ...,  0.9947,  0.9818,  0.9147],
        [ 0.9992,  0.9932,  0.9842,  ...,  0.9537,  0.9983,  0.5296],
        [ 0.2267, -0.7468, -0.7764,  ..., -0.6215,  0.1358, -0.3052]])

In [13]:
W2 = torch.rand(100, 27)
b2 = torch.rand(27)

In [22]:
logits = h @ W2 + b2

In [23]:
logits.shape

torch.Size([32, 27])

In [24]:
counts = logits.exp()

In [25]:
probs = counts/counts.sum(1, keepdims=True)

In [19]:
probs.shape

torch.Size([32, 27])

In [26]:
probs[0].sum()

tensor(1.)

In [27]:
Y

tensor([ 5, 13, 13,  1,  0, 15, 12,  9, 22,  9,  1,  0,  1, 22,  1,  0,  9, 19,
         1,  2,  5, 12, 12,  1,  0, 19, 15, 16,  8,  9,  1,  0])

In [30]:
probs[0][5]

tensor(3.4445e-06)

In [31]:
torch.arange(32)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])

In [34]:
result = probs[torch.arange(32), Y]

In [35]:
loss = - result.log().mean()

In [36]:
loss

tensor(5.4488)

### Summarized Forward Pass

In [43]:
# build the dataset
def build_dataset(block_size, word_subset, verbose=False):
    X, Y = [], []

    for w in words[:word_subset]:
        if verbose:
            print(w)
        context = [0]*block_size
        # print(context)
        for ch in w+'.':
            ix=stoi[ch]
            X.append(context)
            Y.append(ix)
            if verbose:
                print(''.join(itos[i] for i in context), '----->', itos[ix])
            # print(context, '--->', ix)
            context = context[1:] + [ix]

    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X, Y

(torch.Size([32, 3]), torch.Size([32]))

In [52]:
embedding_dimension = 2
neuron_second_layer = 100
block_size = 3
word_subset = 5
X, Y = build_dataset(block_size=block_size, word_subset=word_subset)
print("Input Shape -->", X.shape, Y.shape)
g = torch.Generator().manual_seed(2147483647)
C = torch.randn((27, embedding_dimension))
emb = C[X]
input_dim = embedding_dimension * block_size
W1 = torch.rand((input_dim, neuron_second_layer))
b1 = torch.rand(neuron_second_layer)
h = torch.tanh(emb.view(-1, input_dim) @ W1 + b1)


Input Shape torch.Size([32, 3]) torch.Size([32])


In [50]:
h.shape

torch.Size([32, 100])