# NLP - Multi-Class Text Classification using CNN+RNN - Sample

By [Akshaj Verma](https://akshajverma.com)  

This notebook takes you through a sample implementation of multi-class text classification in the form of sentiment analysis on yelp reviews using CNN+RNN in PyTorch.

In [1]:
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader


%matplotlib inline

torch.manual_seed(1)

<torch._C.Generator at 0x7fd1648b02f0>

## Prepare Data

In [2]:
training_data = [
    ("Ronaldo plays football a lot.".split(), "football"),
    ("Cho likes quidditch very much.".split(), "quidditch"),
    ("Jordan adores basketball a lot.".split(), "basketball"),
    ("McTominay plays football very well.".split(), "football"),
    ("Woods likes quidditch a lot.".split(), "quidditch"),
    ("Kobe adores basketball very much.".split(), "basketball"),
    ("Scholes likes quidditch a lot.".split(), "football"),
    ("Ginny adores quidditch very much.".split(), "quidditch")

]

sentence_list = [training_data[x][0] for x in range(len(training_data))]
tag_list = [training_data[x][1] for x in range(len(training_data))]

### The input sentences.

In [3]:
sentence_list

[['Ronaldo', 'plays', 'football', 'a', 'lot.'],
 ['Cho', 'likes', 'quidditch', 'very', 'much.'],
 ['Jordan', 'adores', 'basketball', 'a', 'lot.'],
 ['McTominay', 'plays', 'football', 'very', 'well.'],
 ['Woods', 'likes', 'quidditch', 'a', 'lot.'],
 ['Kobe', 'adores', 'basketball', 'very', 'much.'],
 ['Scholes', 'likes', 'quidditch', 'a', 'lot.'],
 ['Ginny', 'adores', 'quidditch', 'very', 'much.']]

### The output tags.

In [4]:
tag_list

['football',
 'quidditch',
 'basketball',
 'football',
 'quidditch',
 'basketball',
 'football',
 'quidditch']

### Clean the input data by converting it into lower case.

In [5]:
data_clean_list = []
for sentence, tags in training_data:
    clean_sentence = [x.lower().split('.')[0] for x in sentence]
    data_clean_list += [(clean_sentence, tags)]

    
sentence_clean_list = [data_clean_list[x][0] for x in range(len(data_clean_list))]

In [6]:
sentence_clean_list

[['ronaldo', 'plays', 'football', 'a', 'lot'],
 ['cho', 'likes', 'quidditch', 'very', 'much'],
 ['jordan', 'adores', 'basketball', 'a', 'lot'],
 ['mctominay', 'plays', 'football', 'very', 'well'],
 ['woods', 'likes', 'quidditch', 'a', 'lot'],
 ['kobe', 'adores', 'basketball', 'very', 'much'],
 ['scholes', 'likes', 'quidditch', 'a', 'lot'],
 ['ginny', 'adores', 'quidditch', 'very', 'much']]

### Create a vocab for input words.

In [7]:
words = []
for sentence in sentence_clean_list:
    words += sentence
words = list(set(words))
print(f"Size of word-vocablury: {len(words)}\n")
print(words)

Size of word-vocablury: 19

['woods', 'football', 'mctominay', 'ginny', 'quidditch', 'ronaldo', 'much', 'plays', 'cho', 'basketball', 'jordan', 'adores', 'lot', 'well', 'scholes', 'very', 'kobe', 'likes', 'a']


### Create a dictionary for input <=> ID.

In [8]:
word2idx = {word: i for i, word in enumerate(words)}
print(word2idx)

{'woods': 0, 'football': 1, 'mctominay': 2, 'ginny': 3, 'quidditch': 4, 'ronaldo': 5, 'much': 6, 'plays': 7, 'cho': 8, 'basketball': 9, 'jordan': 10, 'adores': 11, 'lot': 12, 'well': 13, 'scholes': 14, 'very': 15, 'kobe': 16, 'likes': 17, 'a': 18}


### Create a vocab for output tags.

In [9]:
tags = []
for tag in tag_list:
    tags.append(tag)
tags = list(set(tags))
print(f"Size of tag-vocab: {len(tags)}\n")
print(tags)

Size of tag-vocab: 3

['quidditch', 'football', 'basketball']


### Create a dictionary for output <=> ID.

In [10]:
tag2idx = {word: i for i, word in enumerate(tags)}
print(tag2idx)

{'quidditch': 0, 'football': 1, 'basketball': 2}


### Encode the words to numbers.

In [11]:
sentence_clean_list, tag_list

([['ronaldo', 'plays', 'football', 'a', 'lot'],
  ['cho', 'likes', 'quidditch', 'very', 'much'],
  ['jordan', 'adores', 'basketball', 'a', 'lot'],
  ['mctominay', 'plays', 'football', 'very', 'well'],
  ['woods', 'likes', 'quidditch', 'a', 'lot'],
  ['kobe', 'adores', 'basketball', 'very', 'much'],
  ['scholes', 'likes', 'quidditch', 'a', 'lot'],
  ['ginny', 'adores', 'quidditch', 'very', 'much']],
 ['football',
  'quidditch',
  'basketball',
  'football',
  'quidditch',
  'basketball',
  'football',
  'quidditch'])

In [12]:
X = [[word2idx[w] for w in s] for s in sentence_clean_list]
X

[[5, 7, 1, 18, 12],
 [8, 17, 4, 15, 6],
 [10, 11, 9, 18, 12],
 [2, 7, 1, 15, 13],
 [0, 17, 4, 18, 12],
 [16, 11, 9, 15, 6],
 [14, 17, 4, 18, 12],
 [3, 11, 4, 15, 6]]

In [13]:
y = [tag2idx[t] for t in tag_list]
y

[1, 0, 2, 1, 0, 2, 1, 0]

## Neural Network Params and Data Loader

Input -> RNN -> Linear -> Sigmoid

### Define the model parameters.

In [14]:
EMBEDDING_SIZE = 6
HIDDEN_SIZE = 7
LEARNING_RATE = 0.01
EPOCH = 10
STACKED_LAYERS = 5
BATCH_SIZE = 4

### Data Loader.

In [15]:
class TrainData(Dataset):
    
    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data

        
    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]
        
    def __len__ (self):
        return len(self.X_data)

In [16]:
train_data = TrainData(torch.Tensor(X).to(torch.long), torch.Tensor(y).to(torch.float32))
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE)

In [17]:
for i, j in train_loader:
    print((i, j))

(tensor([[ 5,  7,  1, 18, 12],
        [ 8, 17,  4, 15,  6],
        [10, 11,  9, 18, 12],
        [ 2,  7,  1, 15, 13]]), tensor([1., 0., 2., 1.]))
(tensor([[ 0, 17,  4, 18, 12],
        [16, 11,  9, 15,  6],
        [14, 17,  4, 18, 12],
        [ 3, 11,  4, 15,  6]]), tensor([0., 2., 1., 0.]))


## CNN MODEL OUTPUT

In [18]:
class CnnRnnModel(nn.Module):
    
    def __init__(self, embedding_size, vocab_size, hidden_size, target_size):
        super(CnnRnnModel, self).__init__()
        
        self.word_embeddings = nn.Embedding(num_embeddings = vocab_size, embedding_dim = embedding_size)
        self.cnn = nn.Conv1d(in_channels=embedding_size, out_channels=2, kernel_size=3, stride=1)
        self.gru = nn.GRU(input_size = 2, hidden_size=hidden_size, batch_first=True)
        self.linear = nn.Linear(in_features = hidden_size, out_features=target_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        print("Embeds: ", embeds.size())
        embeds_t = embeds.transpose(1, 2)
        print("Embeds_t: ", embeds_t.size())
        cnn = torch.relu(self.cnn(embeds_t))
        print("CNN: ", cnn.size())
        
        gru_input = cnn.transpose(1, 2)
        print("GRU Input: ", gru_input.size())
        gru_out, gru_hidden = self.gru(gru_input)
        print("GRU Out: ", gru_out.size())
        print(gru_out)
        print("GRU Hidden: ", gru_hidden.size())
        print(gru_hidden)
        linear_input = gru_hidden.squeeze()
        print("Linear Input: ", linear_input.size())
        print(linear_input)
        linear = self.linear(linear_input)
        
        return linear

In [19]:
cnn_rnn_model = CnnRnnModel(embedding_size=EMBEDDING_SIZE, vocab_size=len(word2idx), hidden_size = HIDDEN_SIZE, target_size=len(tag2idx))
print(cnn_rnn_model)

criterion = nn.CrossEntropyLoss()
optimizer =  optim.Adam(cnn_rnn_model.parameters())

CnnRnnModel(
  (word_embeddings): Embedding(19, 6)
  (cnn): Conv1d(6, 2, kernel_size=(3,), stride=(1,))
  (gru): GRU(2, 7, batch_first=True)
  (linear): Linear(in_features=7, out_features=3, bias=True)
)


### See how the CNN+RNN output from the model looks.

In [20]:
with torch.no_grad():
    for x_batch, y_batch in train_loader:
        print("Input: ", x_batch.size())
        print(x_batch, "\n")
        y_out = cnn_rnn_model(x_batch)
        
        y_out_softmax = torch.log_softmax(y_out, dim = 1)
        _, y_out_tags = torch.max(y_out_softmax, dim = 1)
        
        print("\nLinear Output: ", y_out.size())
        print(y_out)
        
        print("\nLogSoftmax Output: ", y_out_softmax.size())
        print(y_out_softmax)
        
        print("\nOutput Indices: ", y_out_tags.size())
        print(y_out_tags)
        
        print("\nActual Output: ", y_batch.size())
        print(y_batch)
        
        print("=" * 50)

Input:  torch.Size([4, 5])
tensor([[ 5,  7,  1, 18, 12],
        [ 8, 17,  4, 15,  6],
        [10, 11,  9, 18, 12],
        [ 2,  7,  1, 15, 13]]) 

Embeds:  torch.Size([4, 5, 6])
Embeds_t:  torch.Size([4, 6, 5])
CNN:  torch.Size([4, 2, 3])
GRU Input:  torch.Size([4, 3, 2])
GRU Out:  torch.Size([4, 3, 7])
tensor([[[-0.0966, -0.0887,  0.0539, -0.0422,  0.2357,  0.2448,  0.0331],
         [ 0.0904,  0.0666,  0.1903, -0.0488,  0.3392,  0.2914,  0.0434],
         [-0.0473, -0.0096,  0.1078, -0.1012,  0.3943,  0.3564,  0.0842]],

        [[ 0.0319,  0.0872,  0.1105, -0.0453,  0.2328,  0.1404, -0.0304],
         [ 0.0401,  0.1305,  0.1386, -0.0795,  0.3389,  0.2078, -0.0176],
         [ 0.0417,  0.1515,  0.1414, -0.1021,  0.3893,  0.2417, -0.0011]],

        [[ 0.0319,  0.0872,  0.1105, -0.0453,  0.2328,  0.1404, -0.0304],
         [ 0.0401,  0.1305,  0.1386, -0.0795,  0.3389,  0.2078, -0.0176],
         [-0.0426,  0.0456,  0.0936, -0.0993,  0.3918,  0.2981,  0.0338]],

        [[-0.0858, -

## TRAIN CNN MODEL

In [21]:
class CnnRnnModel(nn.Module):
    
    def __init__(self, embedding_size, vocab_size, hidden_size, target_size):
        super(CnnRnnModel, self).__init__()
        
        self.word_embeddings = nn.Embedding(num_embeddings = vocab_size, embedding_dim = embedding_size)
        self.cnn = nn.Conv1d(in_channels=embedding_size, out_channels=2, kernel_size=3, stride=1)
        self.gru = nn.GRU(input_size = 2, hidden_size=hidden_size, batch_first=True)
        self.linear = nn.Linear(in_features = hidden_size, out_features=target_size)

    def forward(self, sentence):
        embeds = self.word_embeddings(sentence)
        embeds_t = embeds.transpose(1, 2)
        cnn = torch.relu(self.cnn(embeds_t))
        
        gru_input = cnn.transpose(1, 2)
        gru_out, gru_hidden = self.gru(gru_input)
        linear_input = gru_hidden.squeeze()
        linear = self.linear(linear_input)
        
        return linear

In [22]:
cnn_rnn_model = CnnRnnModel(embedding_size=EMBEDDING_SIZE, vocab_size=len(word2idx), hidden_size = HIDDEN_SIZE, target_size=len(tag2idx))
print(cnn_rnn_model)

criterion = nn.CrossEntropyLoss()
optimizer =  optim.Adam(cnn_rnn_model.parameters())

CnnRnnModel(
  (word_embeddings): Embedding(19, 6)
  (cnn): Conv1d(6, 2, kernel_size=(3,), stride=(1,))
  (gru): GRU(2, 7, batch_first=True)
  (linear): Linear(in_features=7, out_features=3, bias=True)
)


In [23]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    return acc

In [24]:
cnn_rnn_model.train()
for e in range(1, EPOCH+1):
    epoch_loss = 0
    epoch_acc = 0
    for x_batch, y_batch in train_loader:
        
        optimizer.zero_grad()
        
        y_out = cnn_rnn_model(x_batch)
                       
        loss = criterion(y_out.squeeze(0), y_batch.long())
        acc = multi_acc(y_out.squeeze(0), y_batch.long())
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    print(f'Epoch: {e+0:02} | Loss: {epoch_loss/len(train_loader):.5f} | Accuracy: {acc}')

Epoch: 01 | Loss: 1.13776 | Accuracy: 0.25
Epoch: 02 | Loss: 1.13474 | Accuracy: 0.25
Epoch: 03 | Loss: 1.13200 | Accuracy: 0.25
Epoch: 04 | Loss: 1.12931 | Accuracy: 0.25
Epoch: 05 | Loss: 1.12665 | Accuracy: 0.25
Epoch: 06 | Loss: 1.12400 | Accuracy: 0.25
Epoch: 07 | Loss: 1.12138 | Accuracy: 0.25
Epoch: 08 | Loss: 1.11877 | Accuracy: 0.25
Epoch: 09 | Loss: 1.11624 | Accuracy: 0.25
Epoch: 10 | Loss: 1.11380 | Accuracy: 0.25
