In [639]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [642]:
# Read in words

words = open(file='../data/names.txt',mode='r').read().splitlines()
words[:10]

['emma',
 'olivia',
 'ava',
 'isabella',
 'sophia',
 'charlotte',
 'mia',
 'amelia',
 'harper',
 'evelyn']

In [None]:
len(words)

In [669]:
chars   = sorted(set(''.join(words)))
stoi    = {s:i+1 for i,s in enumerate(chars)} # string to integer
stoi['.'] = 0 
itos = {i:s for s,i in stoi.items()} # integer to string
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


1

In [695]:
# Building the dataset
block_size = 3

X, Y = [], [] # x is input, y is label (i.e. pred)
for w in words[:5]:
    print(w)
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
    
        print(''.join(itos[i] for i in context), '---->', itos[ix])
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y) 

emma
... ----> e
..e ----> m
.em ----> m
emm ----> a
mma ----> .
olivia
... ----> o
..o ----> l
.ol ----> i
oli ----> v
liv ----> i
ivi ----> a
via ----> .
ava
... ----> a
..a ----> v
.av ----> a
ava ----> .
isabella
... ----> i
..i ----> s
.is ----> a
isa ----> b
sab ----> e
abe ----> l
bel ----> l
ell ----> a
lla ----> .
sophia
... ----> s
..s ----> o
.so ----> p
sop ----> h
oph ----> i
phi ----> a
hia ----> .


tensor([ 5, 13, 13,  1,  0, 15, 12,  9, 22,  9,  1,  0,  1, 22,  1,  0,  9, 19,
         1,  2,  5, 12, 12,  1,  0, 19, 15, 16,  8,  9,  1,  0])

We are going to create a neural net similar to the one documented in Bengio et al. 2003 MLP language model paper[https://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf]


![neural](./img/neural-net.png)

Below we have created 32 examples with 3 inputs (i.e. 3 chars inputted to each example) into the model 

In [711]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

Lets create embeddings in a two dimensional space. We have 27 characters, each of which will have a 2d embedding

In [706]:
C = torch.randn((27,2)) # 27 characters each which have 2 dimensional space
C

tensor([[ 0.3646, -0.1091],
        [ 1.8345, -0.3759],
        [ 1.0427,  0.0862],
        [-1.0084,  1.1625],
        [ 0.0908,  0.0155],
        [-1.6450,  0.0996],
        [ 0.1930, -0.9899],
        [-0.0953, -1.6928],
        [-2.4998,  0.0525],
        [ 0.3354,  0.4618],
        [-0.3805,  0.7080],
        [ 0.4160, -0.5902],
        [-0.1432, -0.7998],
        [ 2.1857, -1.1717],
        [-0.5928, -0.2904],
        [-0.0981, -0.1122],
        [-0.9736,  0.7044],
        [-0.8468, -1.1654],
        [-1.6480,  0.5245],
        [ 0.1453, -1.3135],
        [ 2.1102, -0.3519],
        [ 1.2241,  0.0788],
        [ 1.0755, -0.3262],
        [ 0.1876,  0.3373],
        [ 0.9418, -1.1469],
        [ 1.2171,  1.5643],
        [-0.5070, -1.3368]])

The below gets the embedding value for 5th index

In [709]:
C[5]

tensor([-1.6450,  0.0996])

We can also feed in multidimension tensors. In our case, we can feed in C[X] where X = torch.Size([32, 3]) 32 examples with 3 inputs for each

Creates our embedding

In [715]:
# embed all our values
emb = C[X]
emb.shape

torch.Size([32, 3, 2])

Now lets create layer 1

![layer](./img/neural-net-layer-1.png)

looking at the image above and the fact that torch.Size([32, 3, 2]), we require 6 weights as 3*2 (i.e. 3 inputs with in 2 dimensional embedding)

Number of neurons is a variable which we can decide on, we choose 100 

For biases, we'll also need 100 to match the neurons

In [716]:
# Create weights for next layer 
W1 = torch.rand((6, 100))
b1 = torch.rand(100)

Next we want to muiltiply our weights and add bias but we cant as emb is a torch.Size([32, 3, 2])

we can use pytorch view to do this. We concatenate across dimension 1 which combines the 3 different inputs (i.e. blocks of chars)

In [724]:
# -1 just lets pytorch figure out the value required
h = torch.tanh(emb.view(-1, 6) @ W1 + b1) # concat our original emb to a 32 * 6 so we can multiply weight and add bias
h

tensor([[ 0.4741,  0.6222,  0.7488,  ...,  0.6165,  0.7452,  0.8291],
        [-0.5843,  0.2760,  0.2611,  ..., -0.7576, -0.6481,  0.2599],
        [-0.4344, -0.7489, -0.6527,  ..., -0.0093,  0.1127,  0.1406],
        ...,
        [-0.9528, -0.7165, -0.7817,  ..., -0.9847, -0.9916, -0.8927],
        [-0.8442, -0.8059, -0.7797,  ..., -0.6439, -0.7717, -0.8834],
        [ 0.7023, -0.0413,  0.3014,  ...,  0.9772,  0.7044, -0.5267]])

In [725]:
h.shape

torch.Size([32, 100])

Now let's create the final layer
![layer](./img/neural-net-layer-2.png)

In [728]:
W2 = torch.randn(100, 27) # input is 100 neurons, and output is 27 as 27 possible characters
b2 = torch.randn(27)

In [729]:
logits = h @ W2 + b2

In [730]:
logits.shape

torch.Size([32, 27])

Exponentiate logits and normalize

In [732]:
counts = logits.exp() # make sure all values are positive and amplify differences

In [733]:
prob = counts / counts.sum(1, keepdims=True) # normalise to get prob distribution, sum along second dimension

In [737]:
prob.shape

torch.Size([32, 27])

Now we need to introduce our prediction sequence Y

We want to identify the probability from each row of prob, we want to pluck out the probability assigned to the correct character

In [739]:
prob[torch.arange(32), Y]

IndexError: too many indices for tensor of dimension 0