<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports-&amp;-Inits" data-toc-modified-id="Imports-&amp;-Inits-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports &amp; Inits</a></span></li><li><span><a href="#Data-Loading" data-toc-modified-id="Data-Loading-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Data Loading</a></span></li><li><span><a href="#Model" data-toc-modified-id="Model-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Model</a></span></li><li><span><a href="#Training" data-toc-modified-id="Training-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Training</a></span></li><li><span><a href="#Misc" data-toc-modified-id="Misc-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Misc</a></span></li></ul></div>

# Surname Classifier Using ElmanRNN

## Imports & Inits

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

import pdb
import pandas as pd
import numpy as np
import torch
import re

from torch import nn
from torch.nn import functional as F
from torch import optim
from torch.utils.data import DataLoader
from pathlib import Path

from ignite.engine import Events, create_supervised_evaluator
from ignite.metrics import Accuracy, Loss
from ignite.contrib.handlers import ProgressBar

In [3]:
from surname.dataset import SurnameDataset
from surname.containers import DataContainer, ModelContainer
from surname.model import SurnameClassifier
from surname.trainer import IgniteTrainer
from consts import consts
vars(consts)

{'path': PosixPath('../data/surnames'),
 'workdir': PosixPath('../data/surnames/rnn_workdir'),
 'proc_dataset_csv': PosixPath('../data/surnames/surnames_with_splits.csv'),
 'model_dir': PosixPath('../data/surnames/rnn_workdir/models'),
 'vectorizer_json': PosixPath('../data/surnames/rnn_workdir/elman_vectorizer.json'),
 'metrics_file': PosixPath('../data/surnames/rnn_workdir/elman_metrics.csv'),
 'class_weights_pth': PosixPath('../data/surnames/rnn_workdir/class_weights.pth'),
 'char_embedding_sz': 100,
 'rnn_hidden_sz': 64,
 'bs': 64,
 'lr': 0.001,
 'n_epochs': 97,
 'device': 'cuda:3',
 'checkpointer_prefix': 'elman',
 'checkpointer_name': 'classifier',
 'es_patience': 11,
 'save_every': 2,
 'save_total': 5}

## Data Loading

In [None]:
df = pd.read_csv(consts.proc_dataset_csv)
print(df.shape)
df.head()

In [None]:
dc = DataContainer(df, SurnameDataset, consts.vectorizer_json, consts.bs, is_load=True)

In [None]:
try:
  class_weights = torch.load(consts.class_weights_pth)
except FileNotFoundError:
  nationality_vocab = dc.nationality_vocab
  class_counts = df['nationality'].value_counts().to_dict()
  sorted_counts = sorted(class_counts.items(), key=lambda x: nationality_vocab.lookup_token(x[0]))
  freq = [count for _, count in sorted_counts]
  class_weights = 1.0/torch.tensor(freq, dtype=torch.float32)
  torch.save(class_weights, consts.class_weights_pth)

## Model

In [None]:
classifier = SurnameClassifier(consts.char_embedding_sz, dc.vocab_size, dc.n_classes, consts.rnn_hidden_sz, \
                       padding_idx=dc.surname_vocab.mask_idx)
class_weights = class_weights.to(consts.device)
loss_fn = nn.CrossEntropyLoss(class_weights)
optimizer = optim.Adam(classifier.parameters(), lr=consts.lr)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', 0.5, patience=1)
mc = ModelContainer(classifier, optimizer, loss_fn, scheduler)
mc.model

In [None]:
itr = iter(dc.train_dl)

In [None]:
inp,y = next(itr)
# y_pred = mc.model(x,l)
# loss_fn(y_pred, y)

In [None]:
f(*inp)

In [None]:
def f(x_in, x_lens, apply_softmax=False):
#   x_in = args[0][0]
#   x_lens = args[0][1]
#   x_in = args[0]
#   x_lens = args[1]
  
  print(x_in.shape, x_lens.shape)

## Training

In [None]:
pbar = ProgressBar(persist=True)
metrics = {'accuray': Accuracy(), 'loss': Loss(loss_fn)}

In [None]:
consts.n_epochs=2
ig = IgniteTrainer(mc, dc, consts, pbar, metrics)

In [None]:
ig.run()

In [None]:
%debug

## Misc

In [None]:
bs=3
hidden_sz=7
seq_sz =5 

In [None]:
x_lens = torch.randint(1, seq_sz+1, (bs,))
x_lens = x_lens.long().detach().cpu().numpy()-1
y_out = torch.randn(bs, seq_sz, hidden_sz)

In [None]:
print(x_lens.shape)
x_lens

In [None]:
print(y_out.shape)
y_out

In [None]:
out = []

for batch_idx, column_idx in enumerate(x_lens):
  out.append(y_out[batch_idx, column_idx])
#   print(batch_idx, column_idx)

In [None]:
y = torch.stack(out)

In [None]:
print(y.shape)
y

In [5]:
bs=3
hidden_sz=7
seq_sz =5 

In [6]:
from surname.elman import ElmanRNN
e = ElmanRNN(consts.char_embedding_sz, consts.rnn_hidden_sz, batch_first=True)

In [7]:
inp = torch.randn(2,10,100)
e(inp)

cpu


tensor([[[-0.3893,  0.2977,  0.7787,  ...,  0.4658,  0.1271,  0.3646],
         [ 0.2691,  0.5169,  0.7028,  ...,  0.8372,  0.1507, -0.2506],
         [-0.8052,  0.1645, -0.3084,  ...,  0.8107, -0.7387,  0.7613],
         ...,
         [ 0.1154, -0.4531, -0.1399,  ..., -0.9183,  0.8297, -0.5086],
         [-0.7285,  0.9511,  0.5086,  ..., -0.1680, -0.4127,  0.3866],
         [ 0.6157, -0.4527, -0.1089,  ..., -0.8248, -0.0625, -0.2276]],

        [[-0.6557,  0.3900, -0.1534,  ...,  0.6318, -0.4107, -0.3874],
         [-0.1698, -0.8997, -0.5457,  ...,  0.3647, -0.0111, -0.2831],
         [ 0.0169,  0.2816, -0.1262,  ...,  0.8822,  0.7671, -0.0265],
         ...,
         [-0.5986,  0.9300, -0.2549,  ..., -0.8514, -0.7358, -0.2227],
         [ 0.7654,  0.7239, -0.5933,  ..., -0.7966,  0.0433,  0.6306],
         [-0.8137,  0.8705, -0.8564,  ..., -0.7344,  0.2394,  0.2144]]],
       grad_fn=<PermuteBackward>)

In [9]:
inp = inp.to('cuda:3')
e = e.to('cuda:3')

In [13]:
inp = inp.cpu()

In [15]:
e = e.cpu()

In [17]:
e(inp)

cpu


tensor([[[-0.3893,  0.2977,  0.7787,  ...,  0.4658,  0.1271,  0.3646],
         [ 0.2691,  0.5169,  0.7028,  ...,  0.8372,  0.1507, -0.2506],
         [-0.8052,  0.1645, -0.3084,  ...,  0.8107, -0.7387,  0.7613],
         ...,
         [ 0.1154, -0.4531, -0.1399,  ..., -0.9183,  0.8297, -0.5086],
         [-0.7285,  0.9511,  0.5086,  ..., -0.1680, -0.4127,  0.3866],
         [ 0.6157, -0.4527, -0.1089,  ..., -0.8248, -0.0625, -0.2276]],

        [[-0.6557,  0.3900, -0.1534,  ...,  0.6318, -0.4107, -0.3874],
         [-0.1698, -0.8997, -0.5457,  ...,  0.3647, -0.0111, -0.2831],
         [ 0.0169,  0.2816, -0.1262,  ...,  0.8822,  0.7671, -0.0265],
         ...,
         [-0.5986,  0.9300, -0.2549,  ..., -0.8514, -0.7358, -0.2227],
         [ 0.7654,  0.7239, -0.5933,  ..., -0.7966,  0.0433,  0.6306],
         [-0.8137,  0.8705, -0.8564,  ..., -0.7344,  0.2394,  0.2144]]],
       grad_fn=<PermuteBackward>)