In [1]:
from pathlib import Path

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
import torch.nn.functional as F

from torchsummary import summary

In [2]:
data_path = Path("../data")

In [3]:
df = pd.read_csv(data_path / "names.txt", names=["Name"])
df.head(10)

Unnamed: 0,Name
0,emma
1,olivia
2,ava
3,isabella
4,sophia
5,charlotte
6,mia
7,amelia
8,harper
9,evelyn


In [4]:
df.describe()

Unnamed: 0,Name
count,32033
unique,29494
top,emma
freq,2


In [5]:
# We have some duplicates 
df['Name'] = df['Name'].str.lower()
df.drop_duplicates(subset=['Name'], keep='first', inplace=True)

df.describe()

Unnamed: 0,Name
count,29494
unique,29494
top,emma
freq,1


In [6]:
df.head(10)

Unnamed: 0,Name
0,emma
1,olivia
2,ava
3,isabella
4,sophia
5,charlotte
6,mia
7,amelia
8,harper
9,evelyn


## mininum and maximum length of the words

In [7]:
df["Length"] = df["Name"].apply(lambda s: len(s))
df.head(10)

Unnamed: 0,Name,Length
0,emma,4
1,olivia,6
2,ava,3
3,isabella,8
4,sophia,6
5,charlotte,9
6,mia,3
7,amelia,6
8,harper,6
9,evelyn,6


In [8]:
df.sort_values(by=["Length"], ascending=False)

Unnamed: 0,Name,Length
31441,muhammadibrahim,15
31442,muhammadmustafa,15
30197,alexanderjames,14
29637,michaelanthony,14
12005,mariaguadalupe,14
...,...,...
24160,jd,2
10930,kc,2
24159,jc,2
23151,dj,2


## Bengio network

In [9]:
# Loop up tables 
chars = ["<S>"] + sorted(set("".join(df["Name"].to_list()))) + ["<E>"]

stoi = {char:i for i, char in enumerate(chars)}
itos = {i:chars for chars, i in stoi.items()}
itos

{0: '<S>',
 1: 'a',
 2: 'b',
 3: 'c',
 4: 'd',
 5: 'e',
 6: 'f',
 7: 'g',
 8: 'h',
 9: 'i',
 10: 'j',
 11: 'k',
 12: 'l',
 13: 'm',
 14: 'n',
 15: 'o',
 16: 'p',
 17: 'q',
 18: 'r',
 19: 's',
 20: 't',
 21: 'u',
 22: 'v',
 23: 'w',
 24: 'x',
 25: 'y',
 26: 'z',
 27: '<E>'}

In [10]:
# Make dataset
block_size = 3  # context lenght of chars to predict next one.
X, y = [], []
for w in df["Name"]:
    # print(w)
    context = [0] * block_size
    for ch in list(w) + ["<E>"]:
        ix = stoi[ch]
        X.append(context)
        y.append(ix)
        # print(''.join(itos[i] for i in context), '--->', itos[ix])
        context = context[1:] + [ix]  # crop and append
    
    # print()

X = torch.tensor(X)
y = torch.tensor(y)

In [11]:
X

tensor([[ 0,  0,  0],
        [ 0,  0,  5],
        [ 0,  5, 13],
        ...,
        [26, 26, 25],
        [26, 25, 26],
        [25, 26, 24]])

In [12]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(90, 200),
            nn.ReLU(),
            nn.Linear(200, 28),
        )
        self.embding_matrix = nn.Parameter(torch.randn(28, 30))

    def forward(self, X):
        embdings = self.embding_matrix[X]
        if embdings.ndim == 3:
            embdings = embdings.reshape(embdings.shape[0], -1)
        else:
            embdings = embdings.reshape(1, -1)


        logits = self.layers(embdings)

        return logits

loss_fn = nn.CrossEntropyLoss()

model = NeuralNetwork()
optimizer = torch.optim.SGD(model.parameters(), lr=1.0, weight_decay=0.001, momentum=0.5)

for p in model.named_parameters():
    print(p)

('embding_matrix', Parameter containing:
tensor([[ 5.2537e-01, -1.6730e-01,  2.5547e-01, -1.0470e+00, -8.5042e-01,
         -3.8749e-01,  3.7234e-01,  1.3390e+00,  1.0605e+00,  2.2074e-01,
          5.5178e-01, -1.5330e+00,  1.8737e+00, -5.9238e-01,  1.0061e-01,
         -1.1603e+00,  1.6711e+00,  1.5940e+00, -1.3948e+00,  1.9766e-01,
          5.1227e-01, -9.3645e-01,  7.0453e-01,  1.2040e+00, -4.9398e-01,
          9.2342e-01,  2.5455e-01,  1.2343e+00,  6.0915e-01, -1.1829e+00],
        [-6.1804e-03,  1.1480e+00, -2.1871e+00, -3.3398e-02,  1.4029e-02,
         -5.9861e-01, -1.7602e-01,  1.0492e+00, -9.0774e-01, -1.1440e-01,
         -2.2670e+00,  1.8973e-01,  9.4379e-03,  2.2926e+00, -1.9772e+00,
          7.6947e-01, -2.2568e-01,  1.7809e-01,  1.6005e+00, -1.1422e+00,
         -4.2204e-01,  5.6504e-01,  2.5561e-01,  1.3993e+00,  2.3725e+00,
         -3.1967e-01,  9.1657e-01,  2.1105e+00,  2.3228e-01,  9.5174e-01],
        [-5.4838e-01, -3.1803e-01, -3.3098e-01, -5.8401e-02, -1.5184e

In [13]:
total_params = sum(p.nelement() for p in model.parameters() if p.requires_grad)
total_params 

24668

In [14]:
for epoch in range(300):
    # forward pass
    pred = model(X)
    loss = loss_fn(pred, y)

    # backward pass
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if epoch % 10 == 0:
        print(f"epoch: {epoch}, loss: {loss.item():>7f}")

epoch: 0, loss: 3.332452
epoch: 10, loss: 2.445498
epoch: 20, loss: 2.356840
epoch: 30, loss: 2.328702
epoch: 40, loss: 2.290268
epoch: 50, loss: 2.274625
epoch: 60, loss: 2.258382
epoch: 70, loss: 2.247930
epoch: 80, loss: 2.237723
epoch: 90, loss: 2.230658
epoch: 100, loss: 2.223550
epoch: 110, loss: 2.217848
epoch: 120, loss: 2.212928
epoch: 130, loss: 2.207871
epoch: 140, loss: 2.203874
epoch: 150, loss: 2.200337
epoch: 160, loss: 2.197238
epoch: 170, loss: 2.194132
epoch: 180, loss: 2.191235
epoch: 190, loss: 2.188997
epoch: 200, loss: 2.186658
epoch: 210, loss: 2.184779
epoch: 220, loss: 2.183040
epoch: 230, loss: 2.181235
epoch: 240, loss: 2.179543
epoch: 250, loss: 2.178113
epoch: 260, loss: 2.176901
epoch: 270, loss: 2.175659
epoch: 280, loss: 2.174585
epoch: 290, loss: 2.173557


In [15]:
block_size

3

In [23]:
# Sample from neural network model
pretty_print = True
for i in range(20):
    context = [0] * block_size
    chars = []
    while True:
        # forward
        with torch.no_grad():
            pred = model(context)
            probs = F.softmax(pred, dim=-1)

        idx = torch.multinomial(probs, num_samples=1).item()

        chars.append(itos[idx])
        context = context[1:] + [ix]
        if idx == 27:
            break

    if pretty_print:
        # hide start and end tokens
        print("".join(chars[1:-1]))
    else:
        print(itos[0] + "".join(chars))

atnu
alswnneynbl
xki
ai
arnnr
au
cyfuyci
uely
aadi
eii
aevvs
ahu
eeh
hoy
aeuniigyi
gcidaxpyesqo
ooubemeirv
rei
rziuu
bho


## A little improvement over trigram model