In [213]:
import torch 
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader, random_split
from torch import nn
import torch.nn.functional as F
from tqdm import tqdm
import numpy as np


In [214]:
with open("data/names.txt") as f:
    words = f.read().splitlines()

chars = [".", "@"] + sorted(list(set(''.join(words))))
vocab_size = len(chars) # 28 in this case

str_to_idx = {s: i for i, s in enumerate(chars)}
idx_to_str = {i: s for s, i in str_to_idx.items()}

# Build the dataset

In [215]:
from torch.nn.utils.rnn import pad_sequence

dataset = []

for word in words:
    sequence = [0] + [str_to_idx[c] for c in word] + [0]
    dataset.append(torch.tensor(sequence))

dataset = pad_sequence(dataset, batch_first=True, padding_value=1)

print(dataset[:3])

dataloader = DataLoader(TensorDataset(dataset), batch_size=500, shuffle=True)

tensor([[ 0,  6, 14, 14,  2,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1],
        [ 0, 16, 13, 10, 23, 10,  2,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1],
        [ 0,  2, 23,  2,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1]])


In [216]:
class RNN(nn.Module):
    def __init__(self, hidden_size=500, embedding_size=20):
        super().__init__()
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.character_embeddings = nn.Embedding(vocab_size, embedding_size)
        # self.i2h = nn.Linear(embedding_size+hidden_size, hidden_size)
        self.i2h = nn.Sequential(
            nn.Linear(embedding_size+hidden_size, hidden_size),
            nn.LeakyReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.Tanh()
        )
        # self.i2o = nn.Linear(embedding_size+hidden_size, vocab_size)
        self.i2o = nn.Sequential(
            nn.Linear(embedding_size+hidden_size, hidden_size),
            nn.LeakyReLU(),
            nn.Linear(hidden_size, vocab_size),
        )

    def forward(self, input, hidden):
        input = self.character_embeddings(input)
        combined = torch.cat([input, hidden], 1)
        # print(combined.shape)
        hidden = self.i2h(combined)
        output = self.i2o(combined)

        return output, hidden

    def initialize_hidden(self, batch_size=1):
        return torch.zeros(batch_size, self.hidden_size)

In [217]:
model = RNN().cuda()
optim = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(ignore_index=1, reduction='sum')

for epoch in range(20):
    losses = []
    counts = 0
    with tqdm(dataloader) as dataloader:
        for batch in dataloader:
            batch = batch[0].cuda()

            hidden = model.initialize_hidden(len(batch)).cuda()
            

            loss = 0
            counts = 0

            for i in range(batch.shape[1]-1):
                inpt = batch[:, i]
                target = batch[:, i+1]
                # print(target)

                output, hidden = model(inpt, hidden)
                loss += criterion(output, target)
                counts += 1

            # loss = torch.mean(loss)

            losses.append(loss.item()/(counts*len(batch)))

            optim.zero_grad()
            loss.backward()
            optim.step()
            
    # print(losses)
    print(f"Epoch {epoch} loss: {np.mean(losses)}")

100%|██████████| 65/65 [00:01<00:00, 45.77it/s]


Epoch 0 loss: 1.1197070018990614


100%|██████████| 65/65 [00:01<00:00, 58.54it/s]


Epoch 1 loss: 1.0125535993242594


100%|██████████| 65/65 [00:01<00:00, 57.06it/s]


Epoch 2 loss: 0.9755790114182692


100%|██████████| 65/65 [00:01<00:00, 58.78it/s]


Epoch 3 loss: 0.9512321528650386


100%|██████████| 65/65 [00:01<00:00, 58.79it/s]


Epoch 4 loss: 0.9321398913332354


100%|██████████| 65/65 [00:01<00:00, 56.68it/s]


Epoch 5 loss: 0.9175531264369581


100%|██████████| 65/65 [00:01<00:00, 58.14it/s]


Epoch 6 loss: 0.9039839262635557


100%|██████████| 65/65 [00:01<00:00, 55.41it/s]


Epoch 7 loss: 0.892285005461864


100%|██████████| 65/65 [00:01<00:00, 59.62it/s]


Epoch 8 loss: 0.8833154886099008


100%|██████████| 65/65 [00:01<00:00, 56.01it/s]


Epoch 9 loss: 0.8745594239206025


100%|██████████| 65/65 [00:01<00:00, 57.24it/s]


Epoch 10 loss: 0.8647306690349446


100%|██████████| 65/65 [00:01<00:00, 56.23it/s]


Epoch 11 loss: 0.8567522334598996


100%|██████████| 65/65 [00:01<00:00, 58.46it/s]


Epoch 12 loss: 0.8489761477454837


100%|██████████| 65/65 [00:01<00:00, 57.72it/s]


Epoch 13 loss: 0.8409239208159192


100%|██████████| 65/65 [00:01<00:00, 56.34it/s]


Epoch 14 loss: 0.8340530972069515


100%|██████████| 65/65 [00:01<00:00, 56.24it/s]


Epoch 15 loss: 0.8291122880273211


100%|██████████| 65/65 [00:01<00:00, 54.57it/s]


Epoch 16 loss: 0.8220381142285043


100%|██████████| 65/65 [00:01<00:00, 55.78it/s]


Epoch 17 loss: 0.8152520880141181


100%|██████████| 65/65 [00:01<00:00, 55.67it/s]


Epoch 18 loss: 0.8094305247842571


100%|██████████| 65/65 [00:01<00:00, 53.12it/s]

Epoch 19 loss: 0.8049684979134228





In [230]:
def sample_names():
    model.cpu()
    model.eval()
    for i in range(20):
        hidden = model.initialize_hidden()
        input = torch.tensor([0])
        name = ""
        for i in range(20):
            output, hidden = model(input, hidden)
            input = torch.multinomial(F.softmax(output, dim=1), 1)
            name += idx_to_str[input.item()]
            if input.item() == 0:
                break
            input = torch.tensor([input])
        if name.strip('.') in words:
            print("FOUND:", name)
            continue
        print("NEW:  ",name)

sample_names()

NEW:   esvie.
FOUND: omara.
NEW:   jaroli.
NEW:   nichail.
NEW:   ellyna.
FOUND: emiliani.
FOUND: anthony.
FOUND: elliani.
NEW:   kombertte.
NEW:   faaouf.
NEW:   samrish.
NEW:   stommi.
FOUND: jade.
NEW:   jeajo.
NEW:   aishaun.
NEW:   mucki.
NEW:   emmeron.
FOUND: armanie.
FOUND: khori.
NEW:   ronelle.


In [220]:
@torch.no_grad()
def evaluate_model(model):
    model.eval()
    correct = 0
    total = 0
    losses = []

    with tqdm(total=len(dataset)) as pbar:
        for name in dataset:
            hidden = model.initialize_hidden()
            loss = 0
            counts = 0
            for i in range(len(name)-1):
                if name[i] == 0 and i > 0:
                    break
                input = torch.tensor([name[i]])
                target = torch.tensor([name[i+1]])
                output, hidden = model(input, hidden)
                loss += F.cross_entropy(output, target)
                pred = output.argmax(dim=1)
                correct += (pred == target).sum()
                counts += 1
                total += 1
            losses.append(loss.item()/counts)
            pbar.update(1)
    print(f"Accuracy: {correct/total}")
    print(f"Loss: {np.mean(losses)}")



evaluate_model(model)

100%|██████████| 32033/32033 [00:58<00:00, 543.34it/s]

Accuracy: 0.428550124168396
Loss: 1.8309666892158272



