# NLP - Multi-Class Text Classification using RNNs - Sample

By [Akshaj Verma](https://akshajverma.com)  

This notebook takes you through a sample implementation of multi-class text classification in the form of sentiment analysis on yelp reviews using RNNs in PyTorch.

In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader


%matplotlib inline

torch.manual_seed(1)

<torch._C.Generator at 0x7fe59848d390>

## Prepare Data

In [2]:
training_data = [
    ("Ronaldo plays football.".split(), "football"),
    ("Cho likes quidditch.".split(), "quidditch"),
    ("Jordan adores basketball.".split(), "basketball"),
    ("McTominay plays football.".split(), "football"),
    ("Woods likes quidditch.".split(), "quidditch"),
    ("Kobe adores basketball.".split(), "basketball"),
    ("Scholes likes quidditch.".split(), "football"),
    ("Ginny adores quidditch.".split(), "quidditch")

]

sentence_list = [training_data[x][0] for x in range(len(training_data))]
tag_list = [training_data[x][1] for x in range(len(training_data))]

### The input sentences.

In [3]:
sentence_list

[['Ronaldo', 'plays', 'football.'],
 ['Cho', 'likes', 'quidditch.'],
 ['Jordan', 'adores', 'basketball.'],
 ['McTominay', 'plays', 'football.'],
 ['Woods', 'likes', 'quidditch.'],
 ['Kobe', 'adores', 'basketball.'],
 ['Scholes', 'likes', 'quidditch.'],
 ['Ginny', 'adores', 'quidditch.']]

### The output tags.

In [4]:
tag_list

['football',
 'quidditch',
 'basketball',
 'football',
 'quidditch',
 'basketball',
 'football',
 'quidditch']

### Clean the input data by converting it into lower case.

In [5]:
data_clean_list = []
for sentence, tags in training_data:
    clean_sentence = [x.lower().split('.')[0] for x in sentence]
    data_clean_list += [(clean_sentence, tags)]

    
sentence_clean_list = [data_clean_list[x][0] for x in range(len(data_clean_list))]

In [6]:
sentence_clean_list

[['ronaldo', 'plays', 'football'],
 ['cho', 'likes', 'quidditch'],
 ['jordan', 'adores', 'basketball'],
 ['mctominay', 'plays', 'football'],
 ['woods', 'likes', 'quidditch'],
 ['kobe', 'adores', 'basketball'],
 ['scholes', 'likes', 'quidditch'],
 ['ginny', 'adores', 'quidditch']]

### Create a vocab for input words.

In [7]:
words = []
for sentence in sentence_clean_list:
    words += sentence
words = list(set(words))
print(f"Size of word-vocablury: {len(words)}\n")
print(words)

Size of word-vocablury: 14

['cho', 'woods', 'ginny', 'plays', 'scholes', 'adores', 'quidditch', 'ronaldo', 'likes', 'mctominay', 'kobe', 'basketball', 'football', 'jordan']


### Create a dictionary for input <=> ID.

In [8]:
word2idx = {word: i for i, word in enumerate(words)}
print(word2idx)

{'cho': 0, 'woods': 1, 'ginny': 2, 'plays': 3, 'scholes': 4, 'adores': 5, 'quidditch': 6, 'ronaldo': 7, 'likes': 8, 'mctominay': 9, 'kobe': 10, 'basketball': 11, 'football': 12, 'jordan': 13}


### Create a vocab for output tags.

In [9]:
tags = []
for tag in tag_list:
    tags.append(tag)
tags = list(set(tags))
print(f"Size of tag-vocab: {len(tags)}\n")
print(tags)

Size of tag-vocab: 3

['basketball', 'football', 'quidditch']


### Create a dictionary for output <=> ID.

In [10]:
tag2idx = {word: i for i, word in enumerate(tags)}
print(tag2idx)

{'basketball': 0, 'football': 1, 'quidditch': 2}


### Encode the words to numbers.

In [11]:
sentence_clean_list, tag_list

([['ronaldo', 'plays', 'football'],
  ['cho', 'likes', 'quidditch'],
  ['jordan', 'adores', 'basketball'],
  ['mctominay', 'plays', 'football'],
  ['woods', 'likes', 'quidditch'],
  ['kobe', 'adores', 'basketball'],
  ['scholes', 'likes', 'quidditch'],
  ['ginny', 'adores', 'quidditch']],
 ['football',
  'quidditch',
  'basketball',
  'football',
  'quidditch',
  'basketball',
  'football',
  'quidditch'])

In [12]:
X = [[word2idx[w] for w in s] for s in sentence_clean_list]
X

[[7, 3, 12],
 [0, 8, 6],
 [13, 5, 11],
 [9, 3, 12],
 [1, 8, 6],
 [10, 5, 11],
 [4, 8, 6],
 [2, 5, 6]]

In [13]:
y = [tag2idx[t] for t in tag_list]
y

[1, 2, 0, 1, 2, 0, 1, 2]

## Neural Network Params and Data Loader

Input -> RNN -> Linear -> Sigmoid

### Define the model parameters.

In [14]:
EMBEDDING_SIZE = 6
HIDDEN_SIZE = 2
LEARNING_RATE = 0.01
EPOCH = 10
STACKED_LAYERS = 5
BATCH_SIZE = 4

### Data Loader.

In [15]:
class TrainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data

        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)

In [16]:
train_data = TrainData(torch.Tensor(X).to(torch.long), torch.Tensor(y).to(torch.float32))
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE)

In [17]:
for i, j in train_loader:
    print((i, j))

(tensor([[ 7,  3, 12],
        [ 0,  8,  6],
        [13,  5, 11],
        [ 9,  3, 12]]), tensor([1., 2., 0., 1.]))
(tensor([[ 1,  8,  6],
        [10,  5, 11],
        [ 4,  8,  6],
        [ 2,  5,  6]]), tensor([2., 0., 1., 2.]))


## GRU MODEL

In [18]:
class GRUtagger(nn.Module):
    
    def __init__(self, embedding_size, vocab_size, hidden_size, target_size, stacked_layers):
        super(GRUtagger, self).__init__()
        
        self.word_embeddings = nn.Embedding(num_embeddings = vocab_size, embedding_dim = embedding_size)
        self.gru = nn.GRU(input_size = embedding_size, hidden_size=hidden_size, batch_first = True, num_layers = stacked_layers)
        self.linear = nn.Linear(in_features = hidden_size, out_features=target_size)
        self.tanh = nn.Tanh()

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        gru_out, gru_hidden = self.gru(embeds)
        linear_out = self.linear(self.tanh(gru_hidden))
        
        y_out = linear_out[-1]
        
        return y_out

In [19]:
gru_model = GRUtagger(embedding_size=EMBEDDING_SIZE, vocab_size=len(word2idx), hidden_size=HIDDEN_SIZE, target_size=len(tag2idx), stacked_layers=STACKED_LAYERS)
print(gru_model)

criterion = nn.CrossEntropyLoss()
optimizer =  optim.Adam(gru_model.parameters())

GRUtagger(
  (word_embeddings): Embedding(14, 6)
  (gru): GRU(6, 2, num_layers=5, batch_first=True)
  (linear): Linear(in_features=2, out_features=3, bias=True)
  (tanh): Tanh()
)


### See how the GRU output from the model looks.

output = [batch size, sent len, hid dim]  
hidden = [batch size, 1, hid dim]

In [20]:
with torch.no_grad():
    for x_batch, y_batch in train_loader:
        print("Input:")
        print(x_batch)
        y_out = gru_model(x_batch)
        
        y_out_softmax = torch.log_softmax(y_out, dim = 1)
        _, y_out_tags = torch.max(y_out_softmax, dim = 1)
        
        print("\nLinear Output:")
        print(y_out)
        
        print("\nLogSoftmax Output:")
        print(y_out_softmax)
        
        print("\nOutput Indices:")
        print(y_out_tags)
        
        print("\nActual Output:")
        print(y_batch)
        
        print("=" * 50)

Input:
tensor([[ 7,  3, 12],
        [ 0,  8,  6],
        [13,  5, 11],
        [ 9,  3, 12]])

Linear Output:
tensor([[-0.7124, -0.5669,  0.5714],
        [-0.7124, -0.5669,  0.5714],
        [-0.7124, -0.5669,  0.5714],
        [-0.7124, -0.5669,  0.5714]])

LogSoftmax Output:
tensor([[-1.7522, -1.6066, -0.4684],
        [-1.7521, -1.6067, -0.4683],
        [-1.7521, -1.6067, -0.4683],
        [-1.7522, -1.6066, -0.4683]])

Output Indices:
tensor([2, 2, 2, 2])

Actual Output:
tensor([1., 2., 0., 1.])
Input:
tensor([[ 1,  8,  6],
        [10,  5, 11],
        [ 4,  8,  6],
        [ 2,  5,  6]])

Linear Output:
tensor([[-0.7124, -0.5669,  0.5714],
        [-0.7124, -0.5669,  0.5714],
        [-0.7124, -0.5669,  0.5714],
        [-0.7124, -0.5669,  0.5714]])

LogSoftmax Output:
tensor([[-1.7521, -1.6067, -0.4683],
        [-1.7521, -1.6067, -0.4683],
        [-1.7522, -1.6066, -0.4683],
        [-1.7521, -1.6067, -0.4683]])

Output Indices:
tensor([2, 2, 2, 2])

Actual Output:
tensor(

### Train the GRU model

In [21]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    return acc

In [22]:
gru_model.train()
for e in range(1, EPOCH+1):
    epoch_loss = 0
    epoch_acc = 0
    for x_batch, y_batch in train_loader:
        
        optimizer.zero_grad()
        
        y_out = gru_model(x_batch)
               
        loss = criterion(y_out, y_batch.squeeze().long())
        acc = multi_acc(y_out, y_batch.squeeze().long())
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    print(f'Epoch: {e+0:02} | Loss: {epoch_loss/len(train_loader):.5f} | Accuracy: {acc}')

Epoch: 01 | Loss: 1.21583 | Accuracy: 0.5
Epoch: 02 | Loss: 1.21336 | Accuracy: 0.5
Epoch: 03 | Loss: 1.21102 | Accuracy: 0.5
Epoch: 04 | Loss: 1.20873 | Accuracy: 0.5
Epoch: 05 | Loss: 1.20647 | Accuracy: 0.5
Epoch: 06 | Loss: 1.20425 | Accuracy: 0.5
Epoch: 07 | Loss: 1.20205 | Accuracy: 0.5
Epoch: 08 | Loss: 1.19989 | Accuracy: 0.5
Epoch: 09 | Loss: 1.19776 | Accuracy: 0.5
Epoch: 10 | Loss: 1.19566 | Accuracy: 0.5
