In [1]:
import numpy as np
from numpy import exp, array, random, dot, mean, abs, tanh, zeros, outer, log
from nltk.corpus import gutenberg
import nltk

In [35]:
class RNN():
    
    def __init__(self, learning_rate, h_size, book, epochs, batch_length, back_steps):
        self.learning_rate = learning_rate
        self.h_size = h_size
        self.epochs = epochs
        self.batch_length = batch_length
        self.back_steps = back_steps
        self.words = gutenberg.words(book)
        self.letters = gutenberg.raw(book)
        self.unique_chars = set(self.letters)
        self.unique_len = len(self.unique_chars)
        self.last_h = zeros((1, self.h_size))
        # Make a mapping from charachter to an integer so that every charachter is represented by that integer
        self.char_to_int = {char:counter for counter, char in enumerate(self.unique_chars)}
        # Make a mapping from integer back to the charachter
        self.int_to_char = {counter:char for counter, char in enumerate(self.unique_chars)}
        
        """
        Model parameters
        
        """
        
        ## randomly initialise weight matrices
        self.Wxh = random.rand(h_size,self.unique_len) * 0.01
        self.Whh = random.rand(h_size,h_size) * 0.01
        self.Why = random.rand(self.unique_len, h_size) * 0.01
        self.bh = random.rand(h_size,) * 0.01
        self.by = random.rand(self.unique_len,) * 0.01
        
        
#         ## Value of the hiddden state at every time step
#         self.hist_h = np.zeros((l, h_size))
#         ## Value of the output at every time step
#         self.hist_y = np.zeros((l,len(unique_chars)))
#         ## probability distribution at every time step
        
    def fit(self):
        
        for x in range(self.epochs):
            print(x)
            for i in range(0, len(self.letters), self.batch_length):
                # Divide the corpus into sequences of fixed length
                inp = [self.char_to_int[k] for k in self.letters[i:i + self.batch_length]]
                target = [self.char_to_int[k] for k in self.letters[i + 1:i + self.batch_length + 1]]
                # Forward pass
                hist_h, output, hist_p = self.train(inp, target)
    #             loss = self.calculate_loss(prob, target)

                # Backward pass
                DWhy, Dby, DWhh, DWxh, Dbh = self.bptt(inp, target, hist_p, hist_h)

                ## Update the paramteres by simple sgd
                self.by -= self.learning_rate * Dby        
                self.Why -= self.learning_rate * DWhy
                self.Whh -= self.learning_rate * DWhh        
                self.Wxh -= self.learning_rate * DWxh        
                self.bh -= self.learning_rate * Dbh
        
            
    def train(self, inp, target):
        ## Value of the hiddden state at every time step
        hist_h = np.zeros((self.batch_length, self.h_size))
        ## Value of the output at every time step
        hist_y = np.zeros((self.batch_length, self.unique_len))
        ## probability distribution at every time step
        hist_p = np.zeros((self.batch_length, self.unique_len))
        ## We will use the last state from previous training batch as the starting state for the next one
        hist_h[-1] = self.last_h
#         hist_h[0] = tanh(self.Wxh[:, inp[0]] + self.bh)
#         hist_y[0] = dot(self.Why, hist_h[0]) + self.by
#         hist_p[0] = softmax(hist_y[0])
        for t in range(self.batch_length):
            hist_h[t] = tanh(dot(self.Whh, hist_h[t-1]) + self.Wxh[:, inp[t]] + self.bh)
            hist_y[t] = dot(self.Why, hist_h[t]) + self.by
            hist_p[t] = self.softmax(hist_y[t])
        self.last_h = hist_h[-1]
        return hist_h, hist_y, hist_p
    
    def calculate_loss(self, output, target):
        loss = 0
        for x in range(self.batch_length):
            pass
    
    def bptt(self, inp, target, hist_p, hist_h):
        ## inp is the input to our RNN
        ## target is the list of numbers that corresponds to the actual letters at each time step
        ## Initialise the gradient matrices for each parameter
        DWhy = np.zeros_like(self.Why)
        Dby = np.zeros_like(self.by)
        DWhh = np.zeros_like(self.Whh)
        DWxh = np.zeros_like(self.Wxh)
        Dbh = np.zeros_like(self.bh)
        ## This is Output - target
        for t in range(self.batch_length):
            ## Output - target
            Dp = hist_p[t]
            Dp[target[t]] -= 1
            ## (Output - target) * (hist_h[t]) 
            DWhy += outer(Dp , hist_h[t].T)
            Dby += Dp
            #(output - target)* Why *(1 - hist_h[t]^2)
            delta = dot(self.Why.T, Dp) * (1 - hist_h[t]**2)
            for step in np.arange(max(0, t - self.back_steps), t + 1)[::-1]:
                DWhh += np.outer(delta, hist_h[step - 1])
                DWxh[:, inp[step]] += delta
                Dbh += delta
                delta += dot(self.Whh, delta) * (1 - hist_h[step]**2)
        for Dparam in [DWxh, DWhh, DWhy, Dbh, Dby]:
            np.clip(Dparam, -5, 5, out=Dparam) # clip to mitigate exploding gradients 
        return [DWhy, Dby, DWhh, DWxh, Dbh]
    
    @staticmethod
    def softmax(x):
        return np.exp(x - np.max(x)) / np.sum(np.exp(x - np.max(x)))

In [36]:
# corpus on which we want to train our RNN
book = 'austen-emma.txt'
#Get all the words in the corpora
words = gutenberg.words(book)
# Get all the letters in the corpora
letters = gutenberg.raw(book)

In [37]:
rnn = RNN(0.001, 128, book, 10, 20, 5) 
rnn.fit()

0


KeyboardInterrupt: 

In [None]:
char_to_int = {char:counter for counter, char in enumerate(uni)}
int_to_char = {counter:char for counter, char in enumerate(uni)}

In [None]:
[char_to_int[k] for k in letters[0:10]]