In [188]:
#Importing required libraries
import random
import requests
import torch
import torch.nn.functional as F

In [7]:
#downloading the data from web url

url = "https://raw.githubusercontent.com/karpathy/makemore/master/names.txt"
output_file_path = "names.txt"

response = requests.get(url)

if response.status_code == 200:
    data = response.text
    with open(output_file_path, "w") as output_file:
        output_file.write(data)
    print(f"Data downloaded and saved to {output_file_path}")
else:
    print("Failed to retrieve data from the URL.")


Data downloaded and saved to names.txt


In [8]:
#opening the txt file in read mode
words = open('names.txt', 'r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [14]:
#creating the sorted list of unique characters
chars = sorted(list(set(''.join(words))))

In [63]:
#simple look up table for char encoding

stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}

In [166]:
#define the block_size which is number of characters you want to consider while predicting the next character in sequence

block_size = 3
X,Y = [],[] # INputs and predictions
for w in words[:3]:
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        Y.append(ix)
        X.append(context)
        context = context[1:] + [ix]

X = torch.tensor(X)
Y = torch.tensor(Y)

In [214]:
def build_data_set(words,block_size):
    X,Y = [],[]
    for w in words:
        context = [0] * block_size #for each word context starts differently
        for ch in w +'.':
            ix = stoi[ch]
            X.append(context)
            Y.append(ix)
            context = context[1:] + [ix]
    X = torch.tensor(X)
    Y = torch.tensor(Y)
    return X,Y


random.seed(42)
random.shuffle(words)
n1 = int(0.8*len(words))
n2 = int(0.9*len(words))
block_size = 3
Xtr, Ytr = build_data_set(words[:n1],block_size)
Xdev, Ydev = build_data_set(words[n1:n2],block_size)
Xte, Yte = build_data_set(words[n2:],block_size)

In [259]:
#defining input dimensions for embedding layers and other inner neuron layers (2 in this case)
emb_input_size = len(chars) + 1
n_dim = 2
w1_input_size = block_size * n_dim
n_neurons = 200
w2_output_size = emb_input_size

In [260]:
g = torch.Generator().manual_seed(2147483647) #use generator with manual seed as same to get the same results
C = torch.rand((emb_input_size,n_dim),requires_grad = True,generator = g) #embedding look up table
w1 = torch.rand((w1_input_size,n_neurons),requires_grad = True, generator = g)#weights
b1 = torch.rand(n_neurons,requires_grad = True, generator = g)#bias
w2 = torch.rand(n_neurons,w2_output_size,requires_grad = True, generator = g)
b2 = torch.rand(w2_output_size,requires_grad = True, generator = g)
lr = 0.1

In [261]:
params = [C,w1,b1,w2,b2] #

In [262]:
#checking total number of parameters for our NN
total_params = 0
for p in params:
    total_params +=p.nelement()
print(total_params)

6881


In [267]:
#training NN

for i in range(10000): #specify the iterations
    # minibatch construct
    ix = torch.randint(0, Xtr.shape[0], (50,))
    #forward pass
    emb = C[Xtr[ix]] #create the embeddings of input tensor using emb look up table
    h = torch.tanh(emb.view(-1,block_size * n_dim) @w1 + b1)
    logits = h @ w2 + b2 #log counts
    loss = F.cross_entropy(logits,Ytr[ix])
    #print(loss)
    
    #backward pass
    for p in params:
        p.grad = None
    loss.backward()
    
    #update the params
    for p in params:
        p.data += -lr * p.grad
print(loss.item())  

2.3876819610595703


In [268]:
#check Xdev loss
emb = C[Xdev] 
h = torch.tanh(emb.view(-1,block_size * n_dim) @w1 + b1)
logits = h @ w2 + b2 #log counts
loss = F.cross_entropy(logits,Ydev).item()
print(loss)

2.4194250106811523


In [269]:
#check Xval loss
emb = C[Xte] 
h = torch.tanh(emb.view(-1,block_size * n_dim) @w1 + b1)
logits = h @ w2 + b2 #log counts
loss = F.cross_entropy(logits,Yte).item()
print(loss)

2.417184829711914


In [271]:
#Sampling from NN
for i in range(10):
    out = []
    context = [0] * block_size
    while True:
        emb = C[torch.tensor(context)] 
        h = torch.tanh(emb.view(1,-1) @w1 + b1)
        logits = h @ w2 + b2
        probs = F.softmax(logits,1)
        ix = torch.multinomial(probs,1).item()
        out.append(ix)
        context = context[1:] + [ix]
        if ix == 0:
            break
    print(''.join(itos[i] for i in out))

aya.
tamesye.
bad.
badtemroten.
ellai.
amyia.
jam.
vewo.
amvaelia.
alyno.
