# Recurrent Neural Networks
- IMDB review sentiment classification with RNN
  - Last time, we have started sentence classification with CNN and have achieved accuracy over 0.80.
  - This time, we try training with RNN to model text data

In [1]:
import numpy as np
import pandas as pd
import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F
torch.__version__

'2.0.1+cu118'

In [4]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

## 1. Import & process dataset
- IMDB review dataset for sentiment analysis
  - [source](http://ai.stanford.edu/~amaas/data/sentiment/)
  - Let's cheat a while and use dataset provided by Keras

In [2]:
from keras.datasets import imdb
from keras.preprocessing import sequence

num_words = 10000
maxlen = 50

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words = num_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [5]:
X_train = pad_sequences(X_train, maxlen = maxlen, padding = 'pre')
X_test = pad_sequences(X_test, maxlen = maxlen, padding = 'pre')

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(25000, 50) (25000, 50) (25000,) (25000,)


In [24]:
pd.Series(y_train).value_counts()

1    12500
0    12500
dtype: int64

In [6]:
type(X_train)

numpy.ndarray

In [8]:
df = pd.DataFrame(X_train)

# Gated Recurrent Unit

## 2. Creating RNN model and training

- Create and train RNN model for sentence classification, with one GRU layer


![](http://upload.wikimedia.org/wikipedia/commons/thumb/3/37/Gated_Recurrent_Unit%2C_base_type.svg/440px-Gated_Recurrent_Unit%2C_base_type.svg.png)

In [10]:
class imdbTrainDataset(torch.utils.data.Dataset):
  def __init__(self):
    self.X = X_train
    self.y = y_train

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

  def __len__(self):
    return len(self.X)

class imdbTestDataset(torch.utils.data.Dataset):
  def __init__(self):
    self.X = X_test
    self.y = y_test

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

  def __len__(self):
    return len(self.X)

In [11]:
# create dataset & dataloader instances
train_dataset = imdbTrainDataset()
test_dataset = imdbTestDataset()

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)

In [18]:
# create RNN with one GRU layer
class Net(nn.Module):
  def __init__(self, input_dim, num_words, embedding_dim, hidden_size, device):
    super(Net, self).__init__()
    self.input_dim = input_dim
    self.embedding_dim = embedding_dim
    self.hidden_size = hidden_size
    self.device = device

    self.embedding = nn.Embedding(num_words, self.embedding_dim)

    # recurrent layer (GRU)
    self.rnn = nn.GRU(input_size = self.embedding_dim, hidden_size = hidden_size)
    self.dense = nn.Linear(hidden_size, 2)

  def forward(self, x):

    x = self.embedding(x)                                   # project to word embedding space

    h0 = torch.from_numpy(np.zeros((1, x.size(1), self.hidden_size))).float().to(self.device)
    x, _ = self.rnn(x, h0)
    x = x[:, -1, :]
    x = self.dense(x)
    return x

In [13]:
torch.cuda.is_available()

True

In [15]:
# hyperparameters
DEVICE = torch.device('cuda')
INPUT_DIM = maxlen
EMBEDDING_DIM = 50
HIDDEN_SIZE = 10
NUM_WORDS = num_words
LEARNING_RATE = 1e-3
NUM_EPOCHS = 30

In [19]:
model = Net(INPUT_DIM, NUM_WORDS, EMBEDDING_DIM, HIDDEN_SIZE, DEVICE).to(DEVICE)
criterion = nn.CrossEntropyLoss()   # do not need softmax layer when using CEloss criterion
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

In [21]:
model

Net(
  (embedding): Embedding(10000, 50)
  (rnn): GRU(50, 10)
  (dense): Linear(in_features=10, out_features=2, bias=True)
)

In [22]:
# training for NUM_EPOCHS
for i in range(NUM_EPOCHS):
  temp_loss = []
  for (x, y) in train_loader:
    x, y = x.long().to(DEVICE), y.to(DEVICE)  # beware that input to embedding should be type 'long'
    outputs = model(x)
    loss = criterion(outputs, y) # y^ - outputs
    temp_loss.append(loss.item())

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print("Loss at {}th epoch: {}".format(i, np.mean(temp_loss)))

Loss at 0th epoch: 0.696902723944917
Loss at 1th epoch: 0.6864059640436756
Loss at 2th epoch: 0.6765898791502933
Loss at 3th epoch: 0.6656695370163236
Loss at 4th epoch: 0.6542431824669546
Loss at 5th epoch: 0.6421763565467329
Loss at 6th epoch: 0.6322078090541217
Loss at 7th epoch: 0.6235314863068717
Loss at 8th epoch: 0.6147601993716493
Loss at 9th epoch: 0.6098867405434044
Loss at 10th epoch: 0.6021499734143821
Loss at 11th epoch: 0.5970303781178533
Loss at 12th epoch: 0.592053763720454
Loss at 13th epoch: 0.5870070165517379
Loss at 14th epoch: 0.5843368575585132
Loss at 15th epoch: 0.5805733079204753
Loss at 16th epoch: 0.5766064924549084
Loss at 17th epoch: 0.5744226584020926
Loss at 18th epoch: 0.5725999802959209
Loss at 19th epoch: 0.5694818747591
Loss at 20th epoch: 0.5683506046022687
Loss at 21th epoch: 0.5658779665827751
Loss at 22th epoch: 0.5636277002643566
Loss at 23th epoch: 0.5620403219850696
Loss at 24th epoch: 0.5616967814917467
Loss at 25th epoch: 0.5600439895476613
L

## 3. Evaluation
- Evaluate the trained RNN model with accuracy score
  - Store probability of each instance to a list and compare it with true y label

In [25]:
y_pred, y_true = [], []
with torch.no_grad():
  for x, y in test_loader:
    x, y = x.long().to(DEVICE), y.to(DEVICE)       # beware that input to embedding should be type 'long'
    outputs = F.softmax(model(x)).max(1)[-1]       # predicted label
    y_true += list(y.cpu().numpy())                # true label
    y_pred += list(outputs.cpu().numpy())

  outputs = F.softmax(model(x)).max(1)[-1]       # predicted label


In [26]:
# evaluation result
from sklearn.metrics import accuracy_score
accuracy_score(y_true, y_pred)

0.5966

In [28]:
from sklearn.metrics import confusion_matrix

Accuracy = TP / All samples; Precision = TP / (TP + FP) # type I error; Recall = TN / (TN + FN) # type II error ---- F1-score trade-off between Precision + Recall; ROC AUC

In [30]:
confusion_matrix(y_true, y_pred)

array([[7403, 5097],
       [4988, 7512]])