<center>
    <img src="https://www.ucalgary.ca/themes/ucalgary/ucws_theme/images/UCalgary.svg" width='30%'>
</center>

[comment]: <> (The following line is for the TOPIC of the week)
<p style="text-align:left;"><font size='4'><b> Introduction to NLP </b></font></p>

---

# RNN Demo

In [3]:
import torch
from torch import nn
import torch.nn.functional as F

import numpy as np

# Problem definition

We will train a model that trying to complete a sentence with RNN.

First we need to create a map to the char and index

In [4]:
[
 ['<bos>', 'a', 'b', 'c', 'd', 'e', '<eos>'], # len = 512
 ['<bos>', 'x', 'y', 'z', '<eos>', '<pad>', '<pad>'], # max-length = 512
]



# [
#     'how are you?' 'good, I am fine.'
# ]
# '<bos>how are you?<sep>good, I am fine.<eos><pad>'

[['<bos>', 'a', 'b', 'c', 'd', 'e', '<eos>'],
 ['<bos>', 'x', 'y', 'z', '<eos>', '<pad>', '<pad>']]

In [5]:
texts = [
    'hey how are you', # => 'h','e','y'...'u'
    'good i am fine',
    'have a nice day',
    'this is an example sentence',
    'another example sentence is here'
]

# Join all the sentences together and extract the unique characters from the combined sentences
chars = set(list(''.join(texts)) + ['<eos>', '<pad>', '<bos>'])

# Creating a dictionary that maps integers to the characters
int2char = dict(enumerate(chars))

# Creating another dictionary that maps characters to integers
char2int = {char: ind for ind, char in int2char.items()}

vocab_size = len(int2char)
int2char

{0: 'o',
 1: '<bos>',
 2: 'h',
 3: 'u',
 4: 'm',
 5: 'a',
 6: 'l',
 7: 'e',
 8: 'v',
 9: 'p',
 10: 'c',
 11: '<pad>',
 12: 'r',
 13: 'x',
 14: '<eos>',
 15: 'i',
 16: 'n',
 17: 'f',
 18: 'w',
 19: ' ',
 20: 's',
 21: 'd',
 22: 't',
 23: 'g',
 24: 'y'}

In [6]:
maxlen = len(max(texts, key=len))
print("The longest string has {} characters".format(maxlen))

The longest string has 32 characters


# Data processing

In [7]:
# Creating lists that will hold our input and target sequences
train_X = []
train_Y = []

for text in texts:
    train_X.append([])
    train_Y.append([])
    for i in range(maxlen+1):
    # for i in range(len(text)):
        x = np.zeros(len(char2int))
        y = np.zeros(len(char2int))
        char_x = '<pad>'
        char_y = '<pad>'
        if i < len(text) - 1:
            char_x = text[i]
            char_y = text[i+1]
        elif i == len(text) - 1:
            char_x = text[i]
            char_y = '<eos>'
        elif i == len(text):
            char_x = '<eos>'
        # make it one hot vector
        x[char2int[char_x]] = 1
        y[char2int[char_y]] = 1
        train_X[-1].append(x)
        train_Y[-1].append(y)

In [8]:
[len(x) for x in train_X]

[33, 33, 33, 33, 33]

In [9]:
[int2char[np.argmax(x)] for x in train_Y[1]]

['o',
 'o',
 'd',
 ' ',
 'i',
 ' ',
 'a',
 'm',
 ' ',
 'f',
 'i',
 'n',
 'e',
 '<eos>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>',
 '<pad>']

In [10]:
sample_id = 0
for x,y in zip(train_X[sample_id], train_Y[sample_id]):
    print(int2char[x.argmax()], '->', int2char[y.argmax()])


h -> e
e -> y
y ->  
  -> h
h -> o
o -> w
w ->  
  -> a
a -> r
r -> e
e ->  
  -> y
y -> o
o -> u
u -> <eos>
<eos> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>
<pad> -> <pad>


In [11]:
# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")

GPU is available


In [104]:
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.W_xa = nn.Linear(input_size, hidden_size)
        self.W_aa = nn.Linear(hidden_size, hidden_size)
        self.W_ay = nn.Linear(hidden_size, output_size)
        # self.g1 = nn.Tanh()
        # Better than tanH
        self.g1 = nn.ReLU()
        # THIS IS INCORRECT!!!!
        # Remember CrossEntropyLoss implements softmax FOR YOU!!!!
        # self.g2 = nn.Softmax()
        self.g2 = nn.Identity()

    def forward(self, x, previous_hidden_state):
        """
        x = x^t
        previous_hidden_state = a^{t-1}

        :return
        y^{t}
        a^{t}
        """

        current_hidden_state = self.g1(
            self.W_aa(previous_hidden_state) +
            self.W_xa(x)
        )

        output = self.g2(
            self.W_ay(current_hidden_state)
        )

        return output, current_hidden_state

    def init_hidden(self, batch_size):
        return torch.zeros(batch_size, self.hidden_size)

In [105]:
# Init the model with hyperparameters
model = RNN(
    input_size=vocab_size,
    hidden_size=50,
    output_size=vocab_size,
)
# We'll also set the model to the device that we defined earlier (default is CPU)
model = model.to(device)

# Define hyperparameters
n_epochs = 200
lr = 0.0075

# Define Loss, Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
schelduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)

In [108]:
# torch.manual_seed(12)
# Training Run
for epoch in range(1, n_epochs + 1):
    # loop the sentence
    for X, Y in zip(train_X, train_Y):
        optimizer.zero_grad()
        hidden = model.init_hidden(1).to(device)
        losses = []

        X = torch.Tensor(X).to(device)
        Y = torch.Tensor(Y).to(device)

        # feed the input in one by one
        # loop the chars
        for x, y in zip(X, Y):
            x = x.reshape(1, -1) # h
            y = y.reshape(1, -1) # e

            output, hidden = model(x.view(1,-1), hidden)

            loss = criterion(output, y)
            losses.append(loss)

        sum(losses).backward()
        optimizer.step()
    schelduler.step()

    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 10/200............. Loss: 0.0000
Epoch: 20/200............. Loss: 0.0000
Epoch: 30/200............. Loss: 0.0000
Epoch: 40/200............. Loss: 0.0000
Epoch: 50/200............. Loss: 0.0000
Epoch: 60/200............. Loss: 0.0000
Epoch: 70/200............. Loss: 0.0000
Epoch: 80/200............. Loss: 0.0000
Epoch: 90/200............. Loss: 0.0000
Epoch: 100/200............. Loss: 0.0000
Epoch: 110/200............. Loss: 0.0000
Epoch: 120/200............. Loss: 0.0000
Epoch: 130/200............. Loss: 0.0000
Epoch: 140/200............. Loss: 0.0000
Epoch: 150/200............. Loss: 0.0000
Epoch: 160/200............. Loss: 0.0000
Epoch: 170/200............. Loss: 0.0000
Epoch: 180/200............. Loss: 0.0000
Epoch: 190/200............. Loss: 0.0000
Epoch: 200/200............. Loss: 0.0000


In [107]:
model.eval()

hidden = model.init_hidden(1).to(device)
chars = 't'
for _ in range(50):
    x = np.zeros(len(char2int))
    x[char2int[chars[-1]]] = 1
    with torch.no_grad():
        output, hidden = model(torch.Tensor(x).view(1, -1).to(device), hidden)

    char_id = output.view(-1).argmax().detach().cpu().item()
    char = int2char[char_id]
    chars += char
    print(f"'{chars}'")
    if char in {'<eos>', '<pad>'}:
        break

'th'
'thi'
'this'
'this '
'this i'
'this is'
'this is '
'this is a'
'this is an'
'this is an '
'this is an e'
'this is an ex'
'this is an exa'
'this is an exam'
'this is an examp'
'this is an exampl'
'this is an example'
'this is an example '
'this is an example s'
'this is an example se'
'this is an example sen'
'this is an example sent'
'this is an example sente'
'this is an example senten'
'this is an example sentenc'
'this is an example sentence'
'this is an example sentence<eos>'
