Source: [Link](https://blog.floydhub.com/a-beginners-guide-on-recurrent-neural-networks-with-pytorch/)  
Note: This is more of a NLP application and uses words/text instead of video

In [1]:
import torch
from torch import nn
import numpy as np

# Create 'Dataset'

In [2]:
text = ['hey how are you', 'good i am fine', 'have a nice day']

# Add all the sentences, get unique characters
chars = set(''.join(text))

# Create dictionary that maps integers to characters
int2char = dict(enumerate(chars))
print("int2char:")
print(int2char)

# Create reverse dictionary
char2int = {char: ind for ind, char in int2char.items()}
print("char2int")
print(char2int)

int2char:
{0: 'y', 1: 'w', 2: 'f', 3: 'v', 4: 'g', 5: 'u', 6: 'm', 7: 'c', 8: 'd', 9: ' ', 10: 'n', 11: 'a', 12: 'h', 13: 'r', 14: 'i', 15: 'o', 16: 'e'}
char2int
{'y': 0, 'w': 1, 'f': 2, 'v': 3, 'g': 4, 'u': 5, 'm': 6, 'c': 7, 'd': 8, ' ': 9, 'n': 10, 'a': 11, 'h': 12, 'r': 13, 'i': 14, 'o': 15, 'e': 16}


## Add Padding

In [3]:
# Get length of longest sentence
maxlen = len(max(text, key=len))

# Pad end of sentences with whitespace
for i in range(len(text)):
    while len(text[i]) < maxlen:
        text[i] += ' ' 
        
# Display all the Sentences
for sen in text:
    print("\'" + sen +  "\'")

'hey how are you'
'good i am fine '
'have a nice day'


## Create actual input data 

In [4]:
input_seq = []
target_seq = []

for i in range(len(text)):
    
    # Remove last character for input sequence
    input_seq.append(text[i][:-1])
    
    # Remove first character for target sequence
    target_seq.append(text[i][1:])
    print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))

Input Sequence: hey how are yo
Target Sequence: ey how are you
Input Sequence: good i am fine
Target Sequence: ood i am fine 
Input Sequence: have a nice da
Target Sequence: ave a nice day


In [5]:
# Convert input data into integers via the dictionaries
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]
    
print(input_seq)
print(target_seq)

[[12, 16, 0, 9, 12, 15, 1, 9, 11, 13, 16, 9, 0, 15], [4, 15, 15, 8, 9, 14, 9, 11, 6, 9, 2, 14, 10, 16], [12, 11, 3, 16, 9, 11, 9, 10, 14, 7, 16, 9, 8, 11]]
[[16, 0, 9, 12, 15, 1, 9, 11, 13, 16, 9, 0, 15, 5], [15, 15, 8, 9, 14, 9, 11, 6, 9, 2, 14, 10, 16, 9], [11, 3, 16, 9, 11, 9, 10, 14, 7, 16, 9, 8, 11, 0]]


In [6]:
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    # Creating a multi-dimensional array of zeros with the desired output shape
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    
    # Replacing the 0 at the relevant character index with a 1 to represent that character
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features

In [7]:
# implement one-hot encoding
input_seq = one_hot_encode(input_seq, dict_size, seq_len, batch_size)

In [8]:
# Convert to torch tensors
input_seq = torch.from_numpy(input_seq)
target_seq = torch.Tensor(target_seq)

# Building the Model

In [9]:
# Check if GPU is available
is_cuda = torch.cuda.is_available()

if is_cuda:
    device = torch.device("cuda")
    print("GPU available")
else:
    device = torch.device("cpu")
    print("GPU unavailable, Use CPU")

GPU unavailable, Use CPU


This Model: 1 Layer of RNN, followed by a fully connected layer

Need to define:  
- a forward() function for sequetial execution
- init_hidden() to initialize the hidden state

In [10]:
class RNNModel(nn.Module):
    
    def __init__(self, input_size, output_size, hidden_dim, n_layers):
        super(RNNModel, self).__init__()
        
        # Define some parameters
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        
        # Model Layers
        
        # RNN Layer
        self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
        
        # FC Layer
        self.fc = nn.Linear(hidden_dim, output_size)
        
    def forward(self, x):
        
        batch_size = x.size(0)
        
        # Initialize the hidden state for the first input using init_hidden()
        hidden = self.init_hidden(batch_size)
        
        # Passing in the input and hidden state into the model and obtain the outputs
        out, hidden = self.rnn(x, hidden)
        
        # Reshape outputs so that it fits into the FC layer
        out = out.contiguous().view(-1, self.hidden_dim)
        out = self.fc(out)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        
        # Just zeros
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
        return hidden
        

# Training the Model

In [11]:
# Instantiate hyperparams
model = RNNModel(input_size=dict_size, output_size=dict_size, hidden_dim = 12, n_layers=1)
model.to(device) # Set CPU/GPU

# Define hyperparams
n_epochs = 100
lr = 0.01

#Define Loss, Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

In [12]:
# Training Run
for epoch in range(1, n_epochs + 1):
    optimizer.zero_grad() # Clear existing gradients from last epoch
    input_seq.to(device)
    output, hidden = model(input_seq)
    
    # Compute loss
    loss = criterion(output, target_seq.view(-1).long())
    loss.backward() # backprop, calcylate gradients
    optimizer.step() # UPdate weights
    
    # Print Status Message
    if epoch%10 == 0:
        print("Epoch: {}/{}...".format(epoch, n_epochs), end=' ')
        print("Loss: {:.4f}".format(loss.item()))
    

Epoch: 10/100... Loss: 2.4404
Epoch: 20/100... Loss: 2.1932
Epoch: 30/100... Loss: 1.7878
Epoch: 40/100... Loss: 1.3279
Epoch: 50/100... Loss: 0.9278
Epoch: 60/100... Loss: 0.6400
Epoch: 70/100... Loss: 0.4426
Epoch: 80/100... Loss: 0.3117
Epoch: 90/100... Loss: 0.2265
Epoch: 100/100... Loss: 0.1706


# Testing the Model

In [13]:
# take in model and character, returns next character prediciton
def predict(model, character):
    # One-hot encoding our input to fit into the model
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = torch.from_numpy(character)
    character = character.to(device)
    
    out, hidden = model(character)

    prob = nn.functional.softmax(out[-1], dim=0).data
    # Taking the class with the highest probability score from the output
    char_ind = torch.max(prob, dim=0)[1].item()

    return int2char[char_ind], hidden

In [14]:
def sample(model, out_len, start='hey'):
    model.eval() # set model to eval mode
    start = start.lower()
    
    # run through starting char
    chars = [ch for ch in start]
    size = out_len - len(chars)
    
    # Pass in previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)
    
    return ''.join(chars)

In [20]:
sample(model, 15, 'how')

'how fine a nice'