## Example of a simple RNN implementation

A simple RNN implementation for predicting next char in a sentence as input.

This code example was inspired in [A Beginner’s Guide on Recurrent Neural Networks with PyTorch](https://blog.floydhub.com/a-beginners-guide-on-recurrent-neural-networks-with-pytorch/)

In [1]:
# Package imports
import numpy as np
import torch
from torch import nn

In [2]:
# Define the sentences we want our model to output
sentences = ["hey how are you", "not so good actually", "i hope you get better"]

# Join sentences and extract its unique characters
unique_chars = set("".join(sentences))

# map integers to chars in unique_chars, and viceversa
int2char = dict(enumerate(unique_chars))
char2int = {char:ind  for ind, char in int2char.items()}


In [3]:
print(int2char)

{0: 'g', 1: 'd', 2: 's', 3: 'o', 4: 'n', 5: 'y', 6: 't', 7: 'l', 8: 'a', 9: 'b', 10: 'e', 11: 'i', 12: 'h', 13: 'w', 14: 'u', 15: ' ', 16: 'p', 17: 'r', 18: 'c'}


### Preprocess data input

In [4]:
# having the longest sequence we're going to pad the rest with " " to match such length
maxlen = len(max(sentences, key=len))
print("The longest string has {} characters".format(maxlen))

# simple padding
for i in range(len(sentences)):
    while len(sentences[i]) < maxlen:
        sentences[i] += ' '

print(sentences)

The longest string has 21 characters
['hey how are you      ', 'not so good actually ', 'i hope you get better']


In [5]:
# Creating lists that will store our input/target sequences, 
# For input:  the last char does not get into account
# For output:  the first  char does not get into account
input_seq, target_seq = [], []

for i in range(len(sentences)):
    input_seq.append(sentences[i][:-1])
    target_seq.append(sentences[i][1:])

In [6]:
print(input_seq)
print(target_seq)

['hey how are you     ', 'not so good actually', 'i hope you get bette']
['ey how are you      ', 'ot so good actually ', ' hope you get better']


In [7]:
# converting our sequences to be a sequence of integers
for i in range(len(sentences)):
    input_seq[i] = [char2int[i_char] for i_char in input_seq[i]]
    target_seq[i] = [char2int[i_char] for i_char in target_seq[i]]

In [8]:
print(input_seq)

[[12, 10, 5, 15, 12, 3, 13, 15, 8, 17, 10, 15, 5, 3, 14, 15, 15, 15, 15, 15], [4, 3, 6, 15, 2, 3, 15, 0, 3, 3, 1, 15, 8, 18, 6, 14, 8, 7, 7, 5], [11, 15, 12, 3, 16, 10, 15, 5, 3, 14, 15, 0, 10, 6, 15, 9, 10, 6, 6, 10]]


In [9]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(sentences)
print("{}:{}:{}".format(dict_size, seq_len, batch_size))

# helper function: creates array of zeros for each character and replaces the corresponding character index with a 1
def oneHotEnc(seq, dict_size, seq_len, batch_size):
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, seq[i][u]] = 1
    return features

19:20:3


In [10]:
input_seq = oneHotEnc(input_seq, dict_size, seq_len, batch_size)


### Define RNN model architecture 

In [11]:
# to torch tensors
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

# Defining our device to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [12]:
class SimpleModel(nn.Module):
    def __init__(self, in_size, out_size, hidden_dim, n_layers):
        super(SimpleModel, self).__init__()
        self.hidden_dim = hidden_dim
        self.n_layers   = n_layers
        self.rnn        = nn.RNN(in_size, hidden_dim, n_layers, batch_first=True)
        self.fc         = nn.Linear(hidden_dim, out_size)
    
    def forward(self, x):
        # Init hidden state for first input using method defined below
        hidden = self.init_hidden(x.size(0))
        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
        return hidden

In [13]:
# Instantiate our SimpleModel
model = SimpleModel(in_size=dict_size, out_size=dict_size, hidden_dim=12, n_layers=1)
model = model.to(device)

# Define hyperparameters
n_epochs = 100
lr=0.01

# Define Loss, Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [14]:
# Training
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad() # Clears existing gradients from previous epoch
    input_seq.to(device)
    output, hidden = model(input_seq)
    loss = criterion(output, target_seq.view(-1).long())
    loss.backward()
    optimizer.step()
    
    if epoch%10 == 0:
        print('Epoch: {}/{}...'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))

Epoch: 10/100... Loss: 2.4244
Epoch: 20/100... Loss: 2.2859
Epoch: 30/100... Loss: 1.9903
Epoch: 40/100... Loss: 1.6325
Epoch: 50/100... Loss: 1.3268
Epoch: 60/100... Loss: 1.0473
Epoch: 70/100... Loss: 0.7960
Epoch: 80/100... Loss: 0.5855
Epoch: 90/100... Loss: 0.4299
Epoch: 100/100... Loss: 0.3172


In [15]:
# Helper function: given a trained model, it receives a sequence of characters and outputs the predict next character
def predict(model, chars):
    # One-hot encoding our input to fit into the model
    chars = np.array([[char2int[c] for c in chars]])
    chars = oneHotEnc(chars, dict_size, chars.shape[1], 1)
    chars = torch.from_numpy(chars)
    chars = chars.to(device)
    
    out, hidden = model(chars)

    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

In [16]:
# Helper function: takes the ground-truth output length and input sequence of characters as arguments
# and returns the predicted sentence
def sample(model, out_len, start='hey'):
    model.eval() 
    start = start.lower()
    chars = [char_i for char_i in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for _ in range(size):
        char, _ = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [21]:
sample(model, 21, 'I hope')

'i hope you get better'