In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt # for making figures
%matplotlib inline

In [2]:
# Reading words
words = [word for line in open('names.txt', 'r') for word in line.split()]

In [26]:
# Building vocabulary of characters and mapping to and from integers
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {s:i for i,s in stoi.items()}
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [37]:
# Building the dataset for neural networks
block_size = 3 # Context size -- lenght of words used in context
X, Y = [], []
for w in words:
    # print(w)
    context = [0]*block_size
    for ch in w+'.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        # print(''.join(itos[i] for i in context), '-->', itos[ix])
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)

In [5]:
X.shape, Y.shape # Dataset

(torch.Size([228146, 3]), torch.Size([228146]))

In [38]:
# Model definition
n_hiddenlayer_neurons = 100
g = torch.Generator().manual_seed(2147483647)
space_size = 2

C = torch.randn(27, space_size) #Embed 27X2
W1 = torch.randn(6, n_hiddenlayer_neurons) #6X100
B1 = torch.randn(n_hiddenlayer_neurons) #100
W2 = torch.randn(n_hiddenlayer_neurons, 27) #100X27
B2 = torch.randn(27) #27

parameters = [C, W1, B1, W2, B2]

for p in parameters:
    p.requires_grad = True

In [9]:
sum(p.nelement() for p in parameters) # total parameters

3481

In [47]:
ix = torch.randint(0, X.shape[0], (32,))
print(ix)
print(X[ix])
print(Y[ix])
emb = C[X[ix]]
print(emb)

tensor([62051, 54598])
tensor([[ 0,  0,  5],
        [ 1, 13,  9]])
tensor([13, 12])
tensor([[[ 0.5468, -0.4675],
         [ 0.5468, -0.4675],
         [ 0.1999,  0.0090]],

        [[ 0.2901, -0.1115],
         [-0.1330, -0.0022],
         [ 0.1024,  0.1278]]], grad_fn=<IndexBackward0>)


In [66]:
# Forward pass

for iter in range(10000):
    # Mini batch construct
    ix = torch.randint(0, X.shape[0], (32,))
    
    # Forward pass
    emb = C[X[ix]] #32 x 3 x 2
    h = torch.tanh(emb.view(-1, 6) @ W1 + B1)
    logits = h @ W2 + B2
    loss = F.cross_entropy(logits, Y[ix])
    # print(loss.item())
    
    # Backward pass
    for p in parameters:
        p.grad = None
    loss.backward()

    # Weight updation
    lr = 0.01 # Learning rate determination and rate decay
    for p in parameters:
        p.data +=  -lr * p.grad

print(loss.item())

1.9545001983642578


In [67]:
# Loss evaluation over the full model

emb = C[X]
h = torch.tanh(emb.view(-1, 6) @ W1 + B1)
logits = h @ W2 + B2
loss = F.cross_entropy(logits, Y)
print(loss.item())

2.3339734077453613
