In [1]:
import sys
sys.path.append('../')
import numpy as np
import bareml.deeplearning.functions as F
import bareml.deeplearning.layers as L
from bareml.deeplearning.optimisers import Adam
from bareml.deeplearning.core import Tensor, get_array_module

In [2]:
# Use the same example used in this post: 
# https://blog.floydhub.com/a-beginners-guide-on-recurrent-neural-networks-with-pytorch/

text = ['hey how are you','good i am fine','have a nice day']

# Join all the sentences together and extract the unique characters from the combined sentences
chars = set(''.join(text))
# Creating a dictionary that maps integers to the characters
int2char = dict(enumerate(chars))
# Creating another dictionary that maps characters to integers
char2int = {char: ind for ind, char in int2char.items()}

# Finding the length of the longest string in our data
maxlen = len(max(text, key=len))

# A simple loop that loops through the list of sentences and adds a ' ' whitespace until the length of
# the sentence matches the length of the longest sentence
for i in range(len(text)):
    while len(text[i])<maxlen:
        text[i] += ' '
        
        
dict_size = len(char2int)
seq_len = maxlen - 1
batch_size = len(text)

def one_hot_encode(sequence, dict_size, seq_len, batch_size):
    # Creating a multi-dimensional array of zeros with the desired output shape
    features = np.zeros((batch_size, seq_len, dict_size), dtype=np.float32)
    
    # Replacing the 0 at the relevant character index with a 1 to represent that character
    for i in range(batch_size):
        for u in range(seq_len):
            features[i, u, sequence[i][u]] = 1
    return features


# Creating lists that will hold our input and target sequences
input_seq = []
target_seq = []

for i in range(len(text)):
    # Remove last character for input sequence
    input_seq.append(text[i][:-1])
    
    # Remove first character for target sequence
    target_seq.append(text[i][1:])
    #print("Input Sequence: {}\nTarget Sequence: {}".format(input_seq[i], target_seq[i]))
    
for i in range(len(text)):
    input_seq[i] = [char2int[character] for character in input_seq[i]]
    target_seq[i] = [char2int[character] for character in target_seq[i]]
    
# Input shape --> (Batch Size, Sequence Length, One-Hot Encoding Size)
input_seq_onehot = one_hot_encode(input_seq, dict_size, seq_len, batch_size)

# transpose the input to fit with (len_seq, batch_size, input_size) input
input_seq_onehot_transposed = input_seq_onehot.transpose(1,0,2)
target_seq_transposed = np.array(target_seq).transpose()

print('--- data ---')
print('input_seq (len_seq, batch_size, input_size):', input_seq_onehot_transposed.shape)
print('target_seq (len_seq, batch_size):', target_seq_transposed.shape)

--- data ---
input_seq (len_seq, batch_size, input_size): (14, 3, 17)
target_seq (len_seq, batch_size): (14, 3)


In [3]:
class SimpleRNN(L.Module):
    def __init__(self, input_size, output_size, hidden_size):
        super().__init__()
        self.hidden_size = hidden_size

        self.rnn = L.RNN(input_size=input_size, hidden_size=hidden_size)
        self.fc = L.Linear(in_features=hidden_size, out_features=output_size)
    
    def forward(self, xs, h=None):
        """
        Parameters
        ----------
        xs: bareml.Tensor (len_seq, batch_size, input_size)
        h: bareml.Tensor (batch_size, hidden_size)
        
        Returns
        -------
        out: bareml.Tensor (len_seq*batch_size, output_size)
        h: bareml.Tensor (batch_size, hidden_size)
        """
        len_seq = xs.shape[0]
        batch_size = xs.shape[1]
        
        # xp will be np (numpy) if we use cpu, cp (cupy) if we use gpu
        xp = get_array_module(xs)
        
        if h is None:
            h = self._h0(batch_size, xp)

        out, h = self.rnn(xs, h)
        
        # Reshaping the outputs such that it can be fit into the fully connected layer
        out = out.reshape(len_seq*batch_size, self.hidden_size)
        out = self.fc(out)
        
        return out, h
    
    def _h0(self, batch_size, xp):
        h = Tensor(xp.zeros((batch_size, self.hidden_size)))
        return h

In [4]:
device = 'cpu'

# make the input / target array as tensor
input_seq_tensor = Tensor(input_seq_onehot_transposed).to(device)
target_seq_tensor = Tensor(target_seq_transposed).to(device)

# Instantiate the model with hyperparameters
model = SimpleRNN(input_size=dict_size, output_size=dict_size, hidden_size=12).to(device)

# Define hyperparameters
n_epochs = 100
lr=0.01

# Define Loss, Optimizer
criterion = F.cross_entropy
optimizer = Adam(model.parameters(), alpha=lr)

In [5]:
# Training Run
for epoch in range(1, n_epochs + 1):
    model.train()
    optimizer.zero_grad() # Clears existing gradients from previous epoch
    output, hidden = model(input_seq_tensor)
    
    #output = F.softmax(output) # outs (l_seq * n, vocab_size)
    
    ts = target_seq_tensor.reshape(-1)
    loss = criterion(output, ts)
    loss.backward() # Does backpropagation and calculates gradients
    optimizer.step() # Updates the weights accordingly
    
    if epoch%10 == 0:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print(loss)

Epoch: 10/100............. tensor(2.2629445)
Epoch: 20/100............. tensor(1.81998)
Epoch: 30/100............. tensor(1.4183012)
Epoch: 40/100............. tensor(1.0511736)
Epoch: 50/100............. tensor(0.7408401)
Epoch: 60/100............. tensor(0.5016372)
Epoch: 70/100............. tensor(0.33888054)
Epoch: 80/100............. tensor(0.23693925)
Epoch: 90/100............. tensor(0.1745706)
Epoch: 100/100............. tensor(0.13668409)


In [6]:
# This function takes in the model and character as arguments and returns the next character prediction and hidden state
def predict(model, character):
    # One-hot encoding our input to fit into the model
    character = np.array([[char2int[c] for c in character]])
    character = one_hot_encode(character, dict_size, character.shape[1], 1)
    character = Tensor(character).to(device)
    character = character.transpose(1,0,2)
    
    out, hidden = model(character)

    prob = F.softmax(out).data

    xp = get_array_module(prob)
    char_ind = xp.argmax(prob[-1]).item()
    return int2char[char_ind], hidden

# This function takes the desired output length and input characters as arguments, returning the produced sentence
def sample(model, out_len, start='hey'):
    model.eval() # eval mode
    start = start.lower()
    # First off, run through the starting characters
    chars = [ch for ch in start]
    size = out_len - len(chars)
    # Now pass in the previous characters and get a new one
    for ii in range(size):
        char, h = predict(model, chars)
        chars.append(char)

    return ''.join(chars)

In [7]:
sample(model, 15, 'good')

'good i am fine '

In [8]:
sample(model, 15, 'hey')

'hey how are you'