In [1]:
import torch
import torch.nn.functional as F
import random

In [2]:
words = open('names.txt','r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [4]:
chars = ['.'] + sorted(set(''.join(words))) # '.' represents start and end token for a word
vocab_size = len(chars)
print('vocabulary size is:', vocab_size)

vocabulary size is: 27


In [7]:
#encoder and decoder
stoi = {j:i for i,j in enumerate(chars)}
itos = {i:j for i,j in enumerate(chars)}

# A) Steps to Follow to create MLP NN which will Generate Names

1) Create Train, Dev & Test Split for the names dataset

2) Intialization of Neural Network Parameters

3) Train the Neural Network

4) Check the Dev Loss

5) Only Once you feel the model is a good fit, Check the Test loss

# B) Sampling

1) Initialize the Context with [0,0,0] --> ...

2) get the embeddings of the Context

3) calculate the logits & probabilities

4) sample from the probabilities 

5) convert the index of sample to string to get the character using the decoder

In [10]:
# 1) Create Train, Dev & Test Split for the names dataset

block_size = 3 
def build_dataset(words):
    x,y = [],[]
    for w in words:
        context = [0] * block_size
        for ch in w + '.':
            x.append(context)
            ix = stoi[ch]
            y.append(ix)
            context = context[1:] + [ix]
    X = torch.tensor(x)
    Y = torch.tensor(y)
    print(X.shape, Y.shape)
    return X,Y

random.seed(42)
random.shuffle(words)

n1 = int(0.8*len(words))
n2 = int(0.9*len(words))

Xtr,Ytr = build_dataset(words[:n1])
Xdev,Ydev = build_dataset(words[n1:n2])
Xte,Yte = build_dataset(words[n2:])

torch.Size([182625, 3]) torch.Size([182625])
torch.Size([22655, 3]) torch.Size([22655])
torch.Size([22866, 3]) torch.Size([22866])


In [25]:
# 2) Intialization of Neural Network Parameters

n_dim = 10
g = torch.Generator().manual_seed(2147483647)
C = torch.randn((vocab_size,n_dim))
W1 = torch.randn((n_dim * block_size, 200))
b1 = torch.randn(200)
W2 = torch.randn((200,vocab_size))
b2 = torch.randn(vocab_size)

parameters = [C,W1,b1,W2,b2]

for p in parameters:
    p.requires_grad = True

In [38]:
# 3) Train the Neural Network
loop_size = 80000
batch_size = 50

for i in range(loop_size):
    ix = torch.randint(0,Xtr.shape[0],(batch_size,))
    emb = C[Xtr[ix]]
    h = torch.tanh(emb.view(-1,(n_dim * block_size)) @ W1 + b1)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits,Ytr[ix])

    #backward pass
    for p in parameters:
        p.grad = None
    loss.backward()

    #updation
    lr = 0.1 if i <10000 else 0.01
    for p in parameters:
        p.data += -lr * p.grad
    
print('Training Loss is:',loss.item())

Training Loss is: 2.482802629470825


In [39]:
# 4) Check the Dev Loss 

emb = C[Xdev]
h = torch.tanh(emb.view(-1,(n_dim * block_size)) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits,Ydev)

print('Dev Loss is:',loss.item())

Dev Loss is: 2.1865222454071045


In [40]:
# 5) Check the Test Loss 

emb = C[Xte]
h = torch.tanh(emb.view(-1,(n_dim * block_size)) @ W1 + b1)
logits = h @ W2 + b2
loss = F.cross_entropy(logits,Yte)

print('Dev Loss is:',loss.item())

Dev Loss is: 2.198982000350952


In [45]:
#Sampling
g = torch.Generator().manual_seed(2147483647 + 1)

for i in range(20):
    out = []
    context = [0] * block_size

    while True:
        emb = C[torch.tensor([context])]
        h = torch.tanh(emb.view(-1,(n_dim * block_size)) @ W1 + b1)
        logits = h @ W2 + b2
        probs = F.softmax(logits, dim = 1)
        ix = torch.multinomial(probs, num_samples = 1, replacement = True, generator = g).item()
        if ix == 0:
            break
        out.append(ix)
        context = context[1:] + [ix]

    res = ''.join([itos[i] for i in out])
    print(res)

emeerael
amnileh
salenni
ifhadne
heisa
josans
genai
navian
ilah
ynonaysen
elessivalyussan
odel
mana
atholexmenie
naiah
anenleylin
domia
teni
belty
jayia
