In [2]:
# !pip install torch

In [1]:
# !pip install pandas 
# !pip install numpy
# !pip install seaborn
# !pip install matplotlib

## resources 
* https://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf 
* http://blog.ezyang.com/2019/05/pytorch-internals/

In [53]:
import pandas as pd
import torch 
import torch.nn.functional as F 
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline 

In [2]:
#read all words in the names.txt data
words = open("names.txt", "r").read().splitlines()
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [3]:
len(words)

32033

In [136]:
chars = sorted(list(set(''.join(words))))

In [137]:
stoi = {s: i+1 for i,s in enumerate(chars)}
stoi['.']= 0

In [138]:
itos = {i:s for s, i in stoi.items()}

In [139]:
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [140]:
#build the dataset

block_size = 3 
X, Y = [], []
for w in words[:5]:
    print(w)
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '---->', itos[ix])
        context  = context[1:]+ [ix] #crop and append


X = torch.tensor(X)
Y = torch.tensor(Y) 

emma
... ----> e
..e ----> m
.em ----> m
emm ----> a
mma ----> .
olivia
... ----> o
..o ----> l
.ol ----> i
oli ----> v
liv ----> i
ivi ----> a
via ----> .
ava
... ----> a
..a ----> v
.av ----> a
ava ----> .
isabella
... ----> i
..i ----> s
.is ----> a
isa ----> b
sab ----> e
abe ----> l
bel ----> l
ell ----> a
lla ----> .
sophia
... ----> s
..s ----> o
.so ----> p
sop ----> h
oph ----> i
phi ----> a
hia ----> .


In [141]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

In [142]:
C = torch.randn((27,2))

In [143]:
C.shape

torch.Size([27, 2])

In [144]:
emb = C[X]
emb.shape

torch.Size([32, 3, 2])

In [145]:
W1 = torch.randn((6, 100))
b1 = torch.randn(100)

In [146]:
emb[:,0, :].shape

torch.Size([32, 2])

In [147]:
#emb @ W1 + b1 #this will not work due to the incorrect dimensions in matmul

In [148]:
torch.cat([emb[:,0,:], emb[:,1,:], emb[:,2,:]], 1).shape

torch.Size([32, 6])

In [149]:
torch.cat(torch.unbind(emb, 1), 1).shape

torch.Size([32, 6])

In [150]:
a = torch.arange(18)

In [151]:
a.view(3,3,2)

tensor([[[ 0,  1],
         [ 2,  3],
         [ 4,  5]],

        [[ 6,  7],
         [ 8,  9],
         [10, 11]],

        [[12, 13],
         [14, 15],
         [16, 17]]])

In [152]:
#instead we can use .view function in pytorch 

In [153]:
h = torch.tanh(emb.view(32, 6) @ W1 + b1)

In [154]:
h.shape

torch.Size([32, 100])

In [155]:
W2 = torch.randn((100, 27))
b2 = torch.randn(27)

In [156]:
logits = h @ W2 + b2

In [157]:
logits.shape

torch.Size([32, 27])

In [158]:
logits.shape

torch.Size([32, 27])

In [159]:
counts = logits.exp()

In [160]:
prob = counts / counts.sum(1, keepdims=True)

In [161]:
prob.shape

torch.Size([32, 27])

In [162]:
loss = -prob[torch.arange(32), Y].log().mean()
loss

tensor(17.8832)

In [163]:
Y

tensor([ 5, 13, 13,  1,  0, 15, 12,  9, 22,  9,  1,  0,  1, 22,  1,  0,  9, 19,
         1,  2,  5, 12, 12,  1,  0, 19, 15, 16,  8,  9,  1,  0])

In [164]:
#torch way

In [165]:
g = torch.Generator().manual_seed(2147483647) #for reproducibility

In [166]:
C = torch.randn((27,2), generator=g)

In [167]:
W1 = torch.randn((6,100), generator=g)

In [168]:
b1 = torch.randn(100, generator=g)

In [169]:
W2 = torch.randn((100, 27), generator=g)

In [170]:
parameters = [C, W1, b1, W2, b2]

In [171]:
sum(p.nelement() for p in parameters)

3481

In [186]:
print("\n\n Understanding indexing in matrices")
Z = torch.randn(1,2).int()
print('\n ********* Z shape', Z.shape)

print('\nZ', Z)
Y1 = np.random.rand(12,3)
print('\n*********** y shape', Y1.shape)

print('\ny', Y1)

print('\n********** Y[Z] shape', Y1[Z].shape)
print('\n ',Y1[Z])




 Understanding indexing in matrices

 ********* Z shape torch.Size([1, 2])

Z tensor([[0, 0]], dtype=torch.int32)

*********** y shape (12, 3)

y [[0.18657716 0.17969784 0.77450534]
 [0.12714417 0.4936401  0.14564047]
 [0.93147938 0.12514592 0.34229549]
 [0.07981861 0.42665697 0.80851583]
 [0.24386709 0.00914453 0.65556921]
 [0.1291377  0.01667377 0.48271729]
 [0.39331823 0.33886162 0.79075063]
 [0.46758983 0.22120557 0.0735999 ]
 [0.99817131 0.40942007 0.53411428]
 [0.61898681 0.05945207 0.55886653]
 [0.10039724 0.0128842  0.95646214]
 [0.4188815  0.97213056 0.39635461]]

********** Y[Z] shape (1, 2, 3)

  [[[0.18657716 0.17969784 0.77450534]
  [0.18657716 0.17969784 0.77450534]]]


In [177]:
emb = C[X] # (32, 3, 2)

In [174]:
import random
emb.shape

torch.Size([32, 3, 2])

In [175]:
emb.shape

torch.Size([32, 3, 2])