In [3]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
words = open('names.txt', 'r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [5]:
len(words)

32033

In [10]:
# build the vocabulary of characters and mapping to/from integers
chars = sorted(list(set(''.join(words))))
stoi = {s: i+1 for i,s in enumerate(chars)}
stoi['.']=0
itos = {i:s for s,i in stoi.items()}
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [30]:
# build the dataset

block_size = 3 # context length: how many characters do we take to predict the next one?
X, Y = [], []
word_subset = 5 # number of words to use as subset for ease

for w in words[:word_subset]:
    print(w)
    context = [0]*block_size
    # print(context)
    for ch in w+'.':
        ix=stoi[ch]
        X.append(context)
        Y.append(ix)
        print(''.join(itos[i] for i in context), '----->', itos[ix])
        # print(context, '--->', ix)
        context = context[1:] + [ix]
        
X = torch.tensor(X)
Y = torch.tensor(Y)
# print(X)
# print(Y)



emma
... -----> e
..e -----> m
.em -----> m
emm -----> a
mma -----> .
olivia
... -----> o
..o -----> l
.ol -----> i
oli -----> v
liv -----> i
ivi -----> a
via -----> .
ava
... -----> a
..a -----> v
.av -----> a
ava -----> .
isabella
... -----> i
..i -----> s
.is -----> a
isa -----> b
sab -----> e
abe -----> l
bel -----> l
ell -----> a
lla -----> .
sophia
... -----> s
..s -----> o
.so -----> p
sop -----> h
oph -----> i
phi -----> a
hia -----> .


In [34]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

In [36]:
embedding_dimension = 2
C = torch.randn((27, embedding_dimension))

In [84]:
emb = C[X]
emb.shape

torch.Size([32, 3, 2])

In [86]:
W1 = torch.rand((6, 100))
b1 = torch.rand(100)

In [89]:
h = torch.tanh(emb.view(-1,6) @ W1 + b1)

In [90]:
h.shape

torch.Size([32, 100])

In [91]:
h

tensor([[ 0.9883,  0.9836,  0.9995,  ...,  0.9991,  0.9693,  0.9812],
        [ 0.9122,  0.9776,  0.9972,  ...,  0.9986,  0.9366,  0.9444],
        [ 0.8088,  0.9484, -0.0454,  ...,  0.6665,  0.0942,  0.6720],
        ...,
        [ 0.9128,  0.9676, -0.8363,  ..., -0.0845,  0.5231,  0.4520],
        [-0.5044, -0.6790, -0.2614,  ..., -0.6106, -0.3342,  0.5153],
        [-0.1435,  0.3108,  0.1511,  ..., -0.3167, -0.7235,  0.2410]])