# Char-RNN-Classification

Tutorial URL: https://pytorch.org/tutorials/intermediate/char_rnn_classification_tutorial.html

## Model
![](https://i.imgur.com/Z2xbySO.png)

+ input: `(batch_size, num_letters)`, represents a character like 's'. one-hot encoding
+ hidden: `(batch_size, hidden_size)`
+ output: `(batch_size, num_classes)`


## 1. Import packages

In [0]:
import os
import glob
import random
import string
import unicodedata
import torch
import torch.nn as nn
import torch.nn.functional as F

## 2. Define constants

In [0]:
hidden_size = 128
num_epochs = 200000

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## 3. Prepare data

In [0]:
def download(url, filename=None):
    import os
    from urllib.request import urlretrieve
    if filename is None:
        filename = url.split('/')[-1]
    if not os.path.exists(filename):
        print('Downloading {} from {}...'.format(filename, url))
        urlretrieve(url, filename=filename)

def unzip(path):
    from zipfile import ZipFile
    with ZipFile(path, 'r') as z:
        z.extractall()

        download('https://download.pytorch.org/tutorial/data.zip')
unzip('data.zip')

paths = glob.glob('data/names/*.txt')

categories = [path.split('/')[-1].split('.')[0] for path in paths]
letters = string.ascii_letters + " .,;'"



num_classes = len(categories)
num_letters = len(letters)


def unicode_to_ascii(s):
    return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn' and c in letters)
  

cat2words = dict()
for path in paths:
    category = path.split('/')[-1].split('.')[0]
    with open(path, 'r') as f:
        words = f.readlines()
        words = [unicode_to_ascii(word.strip()) for word in words]
        cat2words[category] = words

        

def random_load():
    category = random.choice(categories)
    words = cat2words[category]
    word = random.choice(words)
    
    word_tensor = torch.zeros(1, len(word), num_letters) # (1, len(word), num_letters)
    for i in range(word_tensor.shape[1]):
        index = letters.find(word[i])
        word_tensor[0][i][index] = 1
    
    category_tensor = torch.zeros(1).long() # (1, 1)
    index = categories.index(category)
    category_tensor[0] = index
    
    return word, category, word_tensor, category_tensor

In [4]:
word, category, word_tensor, category_tensor = random_load()
print(word)
print(category)
print(word_tensor.shape)
print(word_tensor)
print(category_tensor.shape)
print(category_tensor)

Ngai
Korean
torch.Size([1, 4, 57])
tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.],
         [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
  

## 4. Build model

In [0]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Model, self).__init__()
        self.i2h = torch.nn.Linear(input_size + hidden_size, hidden_size) # fc_hidden
        self.i2o = torch.nn.Linear(input_size + hidden_size, num_classes) # fc_output
        
        self.loss_fn = torch.nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=1e-3, betas=(0.9, 0.99))
        
        self.to(device)
        
    def _rnn(self, words, hidden):
        inputs = torch.cat([words, hidden], 1)
        outputs = self.i2o(inputs)
        next_hidden = self.i2h(inputs)
        return F.log_softmax(outputs), next_hidden

    def forward(self, words): # not underlined
        '''
            inputs: (B, timesteps, num_letters)
            hidden: (B, hidden_size)
        '''
        batch_size = words.shape[0]
        hidden_size = self.i2h.out_features
        hidden = torch.zeros(batch_size, hidden_size).to(device)
        for i in range(words.shape[1]):
            y_predicted, hidden = self._rnn(words[:, i, :], hidden)
        
        return y_predicted # (B, num_classes)

    def fit(self, dataloader, num_epochs):
        self.train()
        for epoch in range(num_epochs):
            word, category, word_tensor, category_tensor = dataloader()
            word_tensor = word_tensor.to(device) # (len(word), 1, num_letters)
            category_tensor = category_tensor.to(device) # (1, num_classes)

            y_predicted = self.forward(word_tensor) # y_predicted.shape: (B, num_classes)

            loss = self.loss_fn(y_predicted, category_tensor)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
            if epoch % 5000 == 0:
                print('Epoch %d || loss=%.6f'%(epoch, loss.cpu().item()))
        
    def evaluate(self, word):
        '''
            word: 'Huang'.
        '''
        self.eval()
        word_tensor = torch.zeros(1, len(word), num_letters) # (1, len(word) num_letters)
        for i in range(word_tensor.shape[1]):
            index = letters.find(word[i])
            word_tensor[0][i][index] = 1
        word_tensor = word_tensor.to(device)
        
        with torch.no_grad():
            y_predicted = self.forward(word_tensor) # (B, num_classes)
        probs, classes = y_predicted.topk(3)

        return probs[0].cpu().numpy(), classes[0].cpu().numpy()        

model = Model(num_letters, hidden_size, num_classes)

## 5. Train

In [6]:
model.fit(random_load, num_epochs=num_epochs)

  app.launch_new_instance()


Epoch 0 || loss=2.879244
Epoch 5000 || loss=0.363566
Epoch 10000 || loss=2.020165
Epoch 15000 || loss=3.508585
Epoch 20000 || loss=0.792526
Epoch 25000 || loss=0.668724
Epoch 30000 || loss=0.374749
Epoch 35000 || loss=0.810371
Epoch 40000 || loss=1.935470
Epoch 45000 || loss=0.850038
Epoch 50000 || loss=1.800413
Epoch 55000 || loss=0.036821
Epoch 60000 || loss=0.752565
Epoch 65000 || loss=1.318513
Epoch 70000 || loss=1.424615
Epoch 75000 || loss=0.746392
Epoch 80000 || loss=1.523151
Epoch 85000 || loss=1.248345
Epoch 90000 || loss=0.193016
Epoch 95000 || loss=0.895539
Epoch 100000 || loss=1.498346
Epoch 105000 || loss=0.111142
Epoch 110000 || loss=0.665448
Epoch 115000 || loss=0.004887
Epoch 120000 || loss=0.565611
Epoch 125000 || loss=0.944741
Epoch 130000 || loss=0.004744
Epoch 135000 || loss=0.240498
Epoch 140000 || loss=0.018857
Epoch 145000 || loss=0.173469
Epoch 150000 || loss=0.079029
Epoch 155000 || loss=2.701761
Epoch 160000 || loss=0.167024
Epoch 165000 || loss=3.864863
Epoch

## 6. Evaluate

In [7]:
name = 'Yamada'
print(name)
probs, classes = model.evaluate(name)
print(probs)
print([categories[c] for c in classes])

name = 'Hinton'
print(name)
probs, classes = model.evaluate(name)
print(probs)
print([categories[c] for c in classes])

name = 'Schmidhuber'
print(name)
probs, classes = model.evaluate(name)
print(probs)
print([categories[c] for c in classes])

Yamada
[-0.03540039 -3.996563   -5.0498314 ]
['Japanese', 'Arabic', 'Spanish']
Hinton
[-0.72587585 -0.997612   -3.1180897 ]
['Scottish', 'English', 'German']
Schmidhuber
[-0.45987606 -1.0869541  -3.8682594 ]
['German', 'Czech', 'English']


  app.launch_new_instance()
