In [69]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
%matplotlib inline

In [255]:
#names.txt contains Arabic, Indian, Greek, and American Names
words = open('names.txt','r').read().splitlines() 
words[-10:-3]

['aphrodite',
 'apollo',
 'ares ',
 'artemis ',
 'athena  ',
 'hephaestus',
 'hestia  ']

In [71]:
len(words)

48358

In [72]:
chars = sorted(list(set((''.join(words)).lower())))
# chars[0], chars[2] = chars[2], chars[0]
# chars.insert(0,'.')
stoi = {s:i for i,s in enumerate(chars)}
itos = {i:s for s,i in stoi.items()}
print(itos)

{0: ' ', 1: '.', 2: 'a', 3: 'b', 4: 'c', 5: 'd', 6: 'e', 7: 'f', 8: 'g', 9: 'h', 10: 'i', 11: 'j', 12: 'k', 13: 'l', 14: 'm', 15: 'n', 16: 'o', 17: 'p', 18: 'q', 19: 'r', 20: 's', 21: 't', 22: 'u', 23: 'v', 24: 'w', 25: 'x', 26: 'y', 27: 'z'}


In [216]:
block_size = 10
X, Y = [], []
for w in words:
    context = [stoi['.']] * block_size
    for ch in w.lower() + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        context = context[1:] + [ix]
    
X = torch.tensor(X)
Y = torch.tensor(Y)

In [217]:
neurons = 300
embDims = 5

C = torch.randn((len(itos),embDims))
W1 = torch.randn((embDims*block_size,neurons)) * 0.01
b1 = torch.randn(neurons)* 0.01
W2 = torch.randn((neurons,len(itos))) * 0.01
b2 = torch.randn(len(itos)) * 0
params = [C,W1,b1,W2,b2]
len(itos)

28

In [218]:
for p in params:
    p.requires_grad = True

In [219]:
def get_loss():
    emb = C[X] #32x3x2
    h = torch.tanh(emb.view(emb.shape[0],embDims*block_size) @ W1 + b1)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits,Y)
    return loss.item()

In [259]:
## TRAINING ##
n = 50000
lr = 0.001
for i in range(n):
    
    #forward pass
    ix = torch.randint(0,X.shape[0], (512,))
    small = X[ix]
    emb = C[small]
    h = torch.tanh(emb.view(emb.shape[0],embDims*block_size) @ W1 + b1)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits,Y[ix])
    
    #backward pass
    for p in params:
        p.grad = None
    loss.backward()
    if( i % (n/50) == 0):
        emb = C[X] #32x3x2
        print( i, "->", loss.item())
        
    #update
    for p in params:
        p.data += -lr * p.grad
        
print( get_loss() )

0 -> 1.948021411895752
1000 -> 1.9614207744598389
2000 -> 2.038947343826294
3000 -> 2.008090019226074
4000 -> 2.0818777084350586
5000 -> 2.090620279312134
6000 -> 2.001598834991455
7000 -> 1.9292336702346802
8000 -> 2.0200302600860596
9000 -> 2.022744655609131
10000 -> 1.920793056488037
11000 -> 1.9705742597579956
12000 -> 2.025944948196411
13000 -> 2.0747344493865967
14000 -> 2.1247143745422363
15000 -> 1.9978214502334595
16000 -> 2.0200507640838623
17000 -> 1.9554550647735596
18000 -> 2.1442532539367676
19000 -> 1.9627459049224854
20000 -> 1.9882065057754517
21000 -> 1.9713928699493408
22000 -> 2.0115978717803955
23000 -> 2.0509579181671143
24000 -> 1.9677590131759644
25000 -> 2.0512852668762207
26000 -> 1.9124013185501099
27000 -> 2.0761404037475586
28000 -> 1.9191193580627441
29000 -> 1.9411853551864624
30000 -> 2.0313971042633057
31000 -> 1.952857494354248
32000 -> 1.9418389797210693
33000 -> 1.9694241285324097
34000 -> 2.149663209915161
35000 -> 2.076702356338501
36000 -> 1.93312

In [260]:
get_loss()

2.0247747898101807

In [261]:
def generate_name(start_context=None, max_length=10):
    if start_context is None:
        start_context = [stoi['.']] * block_size 
    else:
        start_context = [stoi[ch] for ch in start_context]

    context = start_context
    generated_name = [itos[s] for s in start_context if s != stoi['.']]
    for _ in range(max_length):
        # Create the input tensor from the current context
        context_tensor = torch.tensor(context).unsqueeze(0)  # shape (1, block_size)
        emb = C[context_tensor]  # shape (1, block_size, 2)
        
        # forward pass
        h = torch.tanh(emb.view(emb.shape[0], -1) @ W1 + b1)
        logits = h @ W2 + b2
        
        # softmax to get probabilities
        probs = F.softmax(logits, dim=-1)
        
        # sample from the distribution
        next_char_index = torch.multinomial(probs, num_samples=1).item()
        
        # append the generated character to the result
        generated_name.append(itos[next_char_index])
        
        # update the context
        context = context[1:] + [next_char_index]
        
        # terminate if the generated character is the period
        if itos[next_char_index] == '.':
            break

    return ''.join(generated_name)

In [274]:
START_CONTEXT = "sa"
num_generations = 5

inp = START_CONTEXT.rjust(block_size, ".")
for i in range(num_generations):
    print(f'{i+1:4d}',". ", generate_name(start_context=list(inp)))

   1 .  saman.
   2 .  safaa.
   3 .  samarie.
   4 .  sayana.
   5 .  saklia.
