## 0. init

In [3]:
import urllib
import torch
import random
import torch.nn.functional as F

In [4]:
# download the data 
file_url = 'https://raw.githubusercontent.com/karpathy/makemore/master/names.txt'
words = urllib.request.urlopen(file_url).read().decode('utf-8').splitlines()
words[:5]

['emma', 'olivia', 'ava', 'isabella', 'sophia']

In [3]:
# make chr indexes s to i and i to s:

chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0  # to represent start/ end of the word 
itos = {i:s for s, i in stoi.items()}


In [4]:
# example: Piyush
print(f"Piyush --> {([stoi[chr] for chr in "piyush"])}")

print(f"{[itos[idx] for idx in ([stoi[chr] for chr in "piyush"])]}")

Piyush --> [16, 9, 25, 21, 19, 8]
['p', 'i', 'y', 'u', 's', 'h']


## 1. Preprocess the data

In [5]:
def construct_model_data(words:list, block_size:int=3):
    ''' 
    constructs X and Y dataset given the block size
    for every word start from . . . -> ch1 , then . . ch1 -> ch2, . ch1 ch2 -> ch3 .. and so on  
    '''
    X, Y = [] , []
    for w in words:
        context = [0] * block_size
        for ch in w + '.':
            ix = stoi[ch]
            X.append(context)
            Y.append(ix)
            # print
            #print(''.join(itos[i] for i in context), '----->', itos[ix])
            #update the cotext 
            context = context[1:] + [ix]

    X = torch.tensor(X)
    Y = torch.tensor(Y)

    return X, Y

In [6]:
# build dataset 

X, Y = construct_model_data(words, block_size=3)
print(f"X-shape {X.shape}")
print(f"Y-shape {Y.shape}")


X-shape torch.Size([228146, 3])
Y-shape torch.Size([228146])


In [7]:
# train / val / test set 

BLOCK_SIZE = 3

random.seed(42)
random.shuffle(words)
n1 = int(0.8 * len(words))
n2 = int(0.9 * len(words))

Xtr, Ytr = construct_model_data(words=words[:n1], block_size=BLOCK_SIZE)
Xdev, Ydev = construct_model_data(words=words[n1:n2], block_size=BLOCK_SIZE) # 10%
Xtest, Ytest = construct_model_data(words=words[n2:], block_size=BLOCK_SIZE) # 10%

## 2. Simple NN

In [8]:
import mlflow
mlflow.set_experiment("103_makemore_mlp_updated")

<Experiment: artifact_location='file:///Users/DKS0652998/deep_learning/everything-andrej-karpathy/mlruns/152638288584773710', creation_time=1721593746632, experiment_id='152638288584773710', last_update_time=1721593746632, lifecycle_stage='active', name='103_makemore_mlp_updated', tags={}>

In [9]:
with mlflow.start_run():

    EMBEDDING_SPACE_DIM = 10
    HIDDEN_LAYER_1_INPUT_SPACE = BLOCK_SIZE * EMBEDDING_SPACE_DIM
    HIDDEN_LAYER_1_OUTPUT = 200

    NUM_ITR = 200_000
    MINI_BATCH_SIZE = 32


    g = torch.Generator().manual_seed(2147483647)
    # Embedding matrix 
    C = torch.randn((27,EMBEDDING_SPACE_DIM), generator=g)
    W1 = torch.randn((HIDDEN_LAYER_1_INPUT_SPACE, HIDDEN_LAYER_1_OUTPUT), generator=g)
    b1 = torch.randn((HIDDEN_LAYER_1_OUTPUT), generator=g)
    W2 = torch.randn((HIDDEN_LAYER_1_OUTPUT, 27), generator=g)
    b2 = torch.randn((27), generator=g)

    parameters = [C, W1, b1, W2, b2]

    num_params = sum(p.nelement() for p in parameters) # total num of params in this NN
    print(f"num of parameters in this NN: {num_params}")

    # we need gradients for these people:
    for p in parameters:
        p.requires_grad = True

    # tracking the learning 
    lr_i = [] # learning rate
    loss_log_i = []
    loss_i = []
    step_i = [] # number of setps in Gradienet Descent

    # run optimization 
    for i in range(NUM_ITR):

        # mini batch construct 
        ix = torch.randint(0, Xtr.shape[0], (MINI_BATCH_SIZE,))

        # forward pass 
        emb = C[Xtr[ix]]
        h = torch.tanh(emb.view(-1, HIDDEN_LAYER_1_INPUT_SPACE) @ W1 + b1)
        logits = h @ W2 + b2

        # loss
        loss = F.cross_entropy(logits, Ytr[ix])

        # backward pass 
        for p in parameters:
            p.grad = None
        loss.backward()

        # update the parameters
        lr = 0.1 if i < 100_000 else 0.01
        for p in parameters:
            p.data += -lr * p.grad

        #print(f"--{i}--{loss.item()}")
        # tracking stats
        step_i.append(i)
        loss_log_i.append(loss.log10().item()) # ? why log10??? 
        loss_i.append(loss.item())
        lr_i.append(lr)

        if i % 10_000 == 0:
            print(f"-{i}--lr:{lr}--loss:{loss.item()}--log_loss:{loss.log10().item()}")


    def cal_loss(X, Y):
        emb = C[X]
        h = torch.tanh(emb.view(-1, HIDDEN_LAYER_1_INPUT_SPACE) @ W1 + b1)
        logits = h @ W2 + b2
        loss = F.cross_entropy(logits, Y)
        return loss
    
    params = {"embedding_size": EMBEDDING_SPACE_DIM, 
                       "hidden_laye_size": HIDDEN_LAYER_1_OUTPUT,
                       "block_size":BLOCK_SIZE,
                       "iterations":NUM_ITR, 
                       "batch_size":MINI_BATCH_SIZE,
                       "num_params":num_params}
    
    mlflow.log_params(params)
    print(params)
    
    mlflow.log_metric("train_loss", cal_loss(Xtr, Ytr))
    mlflow.log_metric("dev/val_loss", cal_loss(Xdev, Ydev))

    print(f"Train-loss: {cal_loss(Xtr, Ytr)}")
    print(f"eval-loss: {cal_loss(Xdev, Ydev)}")


num of parameters in this NN: 11897
-0--lr:0.1--loss:25.087587356567383--log_loss:1.399458885192871
-10000--lr:0.1--loss:2.439628839492798--log_loss:0.3873237669467926
-20000--lr:0.1--loss:2.557973861694336--log_loss:0.40789610147476196
-30000--lr:0.1--loss:2.229522466659546--log_loss:0.3482118546962738
-40000--lr:0.1--loss:2.339263677597046--log_loss:0.3690791726112366
-50000--lr:0.1--loss:2.208639144897461--log_loss:0.34412476420402527
-60000--lr:0.1--loss:2.3891236782073975--log_loss:0.37823861837387085
-70000--lr:0.1--loss:2.0298643112182617--log_loss:0.3074670135974884
-80000--lr:0.1--loss:1.982558250427246--log_loss:0.2972259521484375
-90000--lr:0.1--loss:1.9115066528320312--log_loss:0.28137582540512085
-100000--lr:0.01--loss:1.9590274095535278--log_loss:0.29204052686691284
-110000--lr:0.01--loss:1.8839510679244995--log_loss:0.27506962418556213
-120000--lr:0.01--loss:1.8435168266296387--log_loss:0.26564711332321167
-130000--lr:0.01--loss:2.312102794647217--log_loss:0.364007145166

## 3. Wrap the model in python class

- A simple python class, which takes a list of word and trains upton initiation
- when called generates new names from the learning

In [1]:
from MakeMore import MakeMoreNN

%load_ext autoreload
%autoreload 2

In [12]:
# we have a set of words
words[0:7]

['ebrima', 'hilton', 'jhene', 'manahil', 'naiah', 'jenisha', 'jahziah']

In [13]:
MyMakeMore = MakeMoreNN(
    words=words,
)

lenth of vocab: 27
Now training the model
-0--lr:0.1--loss:3.2715821266174316--log_loss:0.5147578120231628
-10000--lr:0.1--loss:2.6359708309173584--log_loss:0.4209406077861786
-20000--lr:0.1--loss:2.8201544284820557--log_loss:0.45027288794517517
-30000--lr:0.1--loss:2.647702217102051--log_loss:0.42286914587020874
-40000--lr:0.1--loss:3.1076724529266357--log_loss:0.4924352467060089
-50000--lr:0.1--loss:2.818751335144043--log_loss:0.4500567615032196
-60000--lr:0.1--loss:2.9821314811706543--log_loss:0.4745267927646637
-70000--lr:0.1--loss:2.865100383758545--log_loss:0.45713984966278076
-80000--lr:0.1--loss:2.8088817596435547--log_loss:0.44853344559669495
-90000--lr:0.1--loss:7.4310688972473145--log_loss:0.8710513114929199
-100000--lr:0.01--loss:3.616550922393799--log_loss:0.5582945942878723
-110000--lr:0.01--loss:2.615199089050293--log_loss:0.41750475764274597
-120000--lr:0.01--loss:2.5161514282226562--log_loss:0.4007367789745331
-130000--lr:0.01--loss:2.3727104663848877--log_loss:0.37524

In [14]:
# Generate new words
for _ in range(10):
    print(MyMakeMore.predict())

mylon
sa
may
lih
jusmarius
rea
uda
ila
shamer
jaydena


In [15]:
len(words)

32033

### Let's get Indian Names

In [16]:
import getindianname
import random

def get_indian_names(count=1000):
    names = set()
    while len(names) < count:
        # Randomly choose between male, female, or random name
        choice = random.choice(['male', 'female', 'random'])
        if choice == 'male':
            names.add(getindianname.male())
        elif choice == 'female':
            names.add(getindianname.female())
        else:
            names.add(getindianname.randname())
    
    return list(names)

# Generate a list of 1000 unique Indian names
indian_names_i = get_indian_names(50_000)

# Print the list to verify
print(indian_names_i) 

['Roshan Sonkar', 'Arun Singh Rai', 'Raxa Yadav', 'Ruchira Divedi', 'Junaid Singh', 'Jumi Gupta', 'Taufeeq Devi', 'Imram Pandey', 'Vrunda Devi', 'Sujitha Kumari', 'Ramjibhai Rajput', 'Nathuram Srivastav', 'Aliya Seth', 'Seerat Seth', 'Manik Yadav', 'Sumanta Srivastav', 'Nitu Kumari Gupta', 'Krina Singh', 'राजेंद्र Rai', 'Nomaan Pandit', 'Babina Kumari', 'Lokhi Kumari', 'Madev Singh', 'Ayeza Rao', 'Kasish Mishra', 'Pareshbhai Yadav', 'Ankaj Thakur', 'Ahana Tiwari', 'Ranjeet Rao', 'Ank Sonkar', 'Sahab Pratap', 'Nandesh Maurrya', 'Dhanunjaya Seth', 'Janu Gupta', 'Oshin Rao', 'Tasveer Divedi', 'Pankit Vishwakarma', 'Mishti Rajput', 'Arifa Srivastav', 'Shadan Gupta', 'Mitul Gupta', 'Abha Rai', 'T R Seth', 'Narendrasingh Vishwakarma', 'Bhuvi Pandey', 'Shakumbhari Yadav', 'Madhu Sonkar', 'Bhupinder Singh Tiwari', 'Mohibur Singh', 'Kushwaha Rao', 'Mʀ Pandey', 'Sarat Kumar Rai', 'Kalpesh Mishra', 'અશોક Devi', 'Nithi Pandit', 'आर्यन Kumari', 'Biplab Kumar', 'Appas Rai', 'Avika Vishwakarma', 'All

In [17]:
IndianMakeMore =  MakeMoreNN(
    words=indian_names_i,
)

lenth of vocab: 28
Now training the model
-0--lr:0.1--loss:3.3961310386657715--log_loss:0.5309844613075256
-10000--lr:0.1--loss:4.6358771324157715--log_loss:0.6661319136619568
-20000--lr:0.1--loss:5.886142730712891--log_loss:0.7698307633399963
-30000--lr:0.1--loss:5.276176929473877--log_loss:0.7223193645477295
-40000--lr:0.1--loss:5.236084461212158--log_loss:0.7190066576004028
-50000--lr:0.1--loss:4.111996173858643--log_loss:0.6140527129173279
-60000--lr:0.1--loss:4.183482646942139--log_loss:0.621537983417511
-70000--lr:0.1--loss:5.03567361831665--log_loss:0.7020576000213623
-80000--lr:0.1--loss:3.7908921241760254--log_loss:0.5787414312362671
-90000--lr:0.1--loss:7.356512069702148--log_loss:0.8666719794273376
-100000--lr:0.01--loss:5.353002071380615--log_loss:0.7285974025726318
-110000--lr:0.01--loss:2.1564269065856934--log_loss:0.33373475074768066
-120000--lr:0.01--loss:1.5710951089859009--log_loss:0.19620247185230255
-130000--lr:0.01--loss:1.6785457134246826--log_loss:0.2249331772327

In [18]:
# Generate new words
for _ in range(10):
    print(IndianMakeMore.predict())

singh
mam tiwari
kitrai
priya tiwarakar
sasoar singh
sukratap
rajput
rakar
rutoijvishra
lachind kumarajput
