<a href="https://colab.research.google.com/github/lilnoes/notebooks/blob/main/nlp_names_rnn-torch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from pathlib import Path

In [None]:
!wget https://download.pytorch.org/tutorial/data.zip && unzip data.zip && pwd

In [None]:
data = Path('/content/data')


In [None]:
def load_dataset(filedir):
  categories = []
  names_dict = {}
  tensor_dict = {}
  for filename in Path(filedir).glob('*.txt'):
    cat = filename.stem
    categories.append(cat)
    lines = filename.read_text(encoding='utf-8').strip().split('\n')
    data_dict[cat] = lines
    tensor_dict[cat] = [process_word(name) for name in lines]


  return categories, data_dict, tensor_dict

In [None]:
import string
categories, data_dict, tensor_dict = load_dataset('/content/data/names')
letters = string.ascii_lowercase

In [None]:
import unicodedata
import re
def uni_to_asci(s):
  return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')

def process_word(word):
  word = uni_to_asci(word.lower())
  word = re.sub(r'[^a-z]', '', word)
  word = [letters.find(i) for i in word]
  return torch.tensor(word)


In [None]:
x = process_word('emma')
x

tensor([ 4, 12, 12,  0])

In [None]:
get_random()

('Scottish', tensor([5]), 'Mckenzie', tensor([12,  2, 10,  4, 13, 25,  8,  4]))

In [None]:
class Model(nn.Module):
  def __init__(self, tx, units, batch_size, vocab_size):
    super(Model, self).__init__()
    self.units = units
    self.batch_size = batch_size
    self.tx = tx
    self.embedding = nn.Embedding(tx, 256)
    self.state = self.initialize_state()
    self.vocab_size = vocab_size
    self.rnn = nn.RNN(input_size=256, hidden_size=units, bidirectional=True)
    self.linear1 = nn.Linear(units*2, 64)
    self.linear2 = nn.Linear(64, vocab_size)

  def forward(self, x):
    state = self.state
    x = self.embedding(x)
    x,_ = self.rnn(x.view(-1, 1, 256), state)
    x = F.dropout( F.relu( self.linear1(x.sum(dim=0))), 0.2)
    x = self.linear2(x)
    return x
    

  def initialize_state(self):
    return torch.zeros(2, self.batch_size, self.units)

In [None]:
tx = len(letters)
units = 128
batch_size = 1
vocab_size = len(categories)

model = Model(tx, units, batch_size, vocab_size)

In [None]:
y = model(tensor)
# y = y.sum(dim=0)
y.shape

RuntimeError: ignored

In [None]:
target = torch.tensor(0).expand(1)
target.size()
# loss_fn = nn.CrossEntropyLoss()
# loss(y.view(1, -1), target)

torch.Size([1])

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005)

def train_step(x, y):
  optimizer.zero_grad()
  x = model(x)
  l = loss_fn(x.view(1, -1), y)
  l.backward()
  optimizer.step()
  return l.item()


In [None]:
def train(epochs, steps=10000):
  for epoch in range(epochs):
    batch_loss = 0
    for step in range(steps):
      _,y,_,x = get_random()
      batch_loss += train_step(x, y)
    print(f'epoch {epoch+1} loss {batch_loss/steps:.3f}')

In [None]:
train(10)

epoch 1 loss 0.499
epoch 2 loss 0.508
epoch 3 loss 0.526
epoch 4 loss 0.537
epoch 5 loss 0.523
epoch 6 loss 0.518
epoch 7 loss 0.492
epoch 8 loss 0.504
epoch 9 loss 0.532
epoch 10 loss 0.515


In [None]:
# cat, catx,name, tensor = get_random()
tensor = process_word('satoshi')
tensor
y = model(tensor)
_,ind = y.topk(3)
for i in range(ind.shape[1]):
  print(categories[ind[0][i]])

Japanese
Italian
Russian


In [None]:
_,ind = y.topk(1)
ind = ind.item()
cat, categories[ind]

('English', 'Polish')

In [None]:
categories

In [None]:
cat, catx,name, tensor = get_random()
y = rnn(tensor)
y.shape

torch.Size([1, 1, 18])

In [None]:
loss_fn(y.view(1,-1), catx)

tensor(2.7988, grad_fn=<NllLossBackward>)

In [None]:
x.dtype
y = rnn(x)
y.size()

torch.Size([1, 1, 18])

In [None]:
import numpy as np
def get_random():
  i = np.random.randint(0, len(categories))
  cat = categories[i]
  j = np.random.randint(0, len(data_dict[cat]) )
  name = data_dict[cat][j]
  tensor = tensor_dict[cat][j]
  return cat, torch.tensor([i]), name, tensor

In [None]:
cat, catx,name, tensor = get_random()

In [None]:
catx.shape

torch.Size([1])

In [None]:
train_step(tensor, catx)

RuntimeError: ignored

In [None]:
target = torch.tensor(0).expand(1)
target.size()
loss = nn.CrossEntropyLoss()
loss(y.view(1, -1), target)

tensor(2.9066, grad_fn=<NllLossBackward>)