In [1]:
import pandas as pd
import torch
import torch.nn.functional as F

In [2]:
df = pd.read_csv('names.csv')
names = df['NAMES'].tolist()

In [3]:
stoi = {s:i+1 for i,s in enumerate(sorted(set("".join(names))))}
stoi["."] = 0
itos = {i:s for s,i in stoi.items()}

In [4]:
# Build dataset
block_size = 3 # context length
X, Y = [],[]

for w in names:
    #print(w)
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)
        Y.append(ix)
        context = context[1:] + [ix] # crop and append 

X = torch.tensor(X)
Y = torch.tensor(Y)


In [5]:
X.shape, X.dtype,Y.shape, Y.dtype

(torch.Size([145813, 3]), torch.int64, torch.Size([145813]), torch.int64)

In [8]:
names[0]

'aabavaanan'

In [6]:
X

tensor([[ 0,  0,  0],
        [ 0,  0,  1],
        [ 0,  1,  1],
        ...,
        [ 0, 26,  5],
        [26,  5,  5],
        [ 5,  5, 12]])

In [10]:
embedding_size = 2
hidden_layer = 100
C = torch.randn((27,embedding_size)) #lookup table

# MLP
W1 = torch.randn((embedding_size*block_size,hidden_layer)) # matrices
b1 = torch.rand(hidden_layer) # bias
W2 = torch.randn((hidden_layer ,27))
b2 = torch.rand(27)

parameters = [C,W1,W2,b1,b2]

In [89]:
for p in parameters:
    p.requires_grad = True

In [94]:
epochs = 10000
for _ in range(epochs):

    #minibatch
    ix = torch.randint(0,X.shape[0],(128,))

    #Forward pass
    emb = C[X[ix]]
    h = torch.tanh(emb.view(-1,embedding_size*block_size) @ W1 + b1)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits,Y[ix])

    #backward pass
    for p in parameters:
        p.grad = None
    loss.backward()
    # update
    for p in parameters:
        p.data += -0.1 * p.grad

print(loss.item())



2.1886990070343018


In [98]:
with torch.no_grad():
    emb = C[X]
    h = torch.tanh(emb.view(-1,embedding_size*block_size) @ W1 + b1)
    logits = h @ W2 + b2
    loss = F.cross_entropy(logits,Y)
    print(loss.item())

2.178276300430298


In [102]:
# Sample from the model
for _ in range(20):

    out = []
    context = [0] * block_size
    while True:
        #forward pass
        emb = C[torch.tensor([context])]
        h = torch.tanh(emb.view(-1,embedding_size*block_size) @ W1 + b1)
        logits = h @ W2 + b2
        prob = F.softmax(logits,dim=1)

        #Sampling from the distribution
        idx = torch.multinomial(prob, 1, replacement=True).item()
        context = context[1:] + [idx]
        out.append(idx)

        if(idx == 0):
            break

    print("".join(itos[i] for i in out))

prithali.
diybadaga.
jajathani.
kora.
sarish.
dhar.
shhanchitha.
ama.
gadharp.
aga.
bhaamoad.
ditpejdraheshindrateshwana.
savyanden.
mrashpaul.
lat.
chakil.
darvavanajdhaitam.
laka.
pir.
ssvandra.
