# Code for the post https://medium.com/p/3f06d7653a85/edit

Code is from below links with minor changes made for the blog post.

References:
    https://gist.github.com/karpathy/d4dee566867f8291f086
    
    https://gist.github.com/satyajitvg/9a5f782ccef5ff81f7f9863b62218b06
    
        

In [1]:
"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""

'\nMinimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)\nBSD License\n'

In [2]:
import numpy as np


# To read the training data and make a vocabulary and dictiornary to index the chars
class DataReader:
    def __init__(self, path, seq_length):
        #uncomment below , if you dont want to use any file for text reading and comment next 2 lines
        #self.data = "some really long text to test this. maybe not perfect but should get you going."
        self.fp = open(path, "r")
        self.data = self.fp.read()
        #find unique chars
        chars = list(set(self.data))
        #create dictionary mapping for each char
        self.char_to_ix = {ch:i for (i,ch) in enumerate(chars)}
        self.ix_to_char = {i:ch for (i,ch) in enumerate(chars)}
        #total data
        self.data_size = len(self.data)
        #num of unique chars
        self.vocab_size = len(chars)
        self.pointer = 0
        self.seq_length = seq_length

    def next_batch(self):
        input_start = self.pointer
        input_end = self.pointer + self.seq_length
        inputs = [self.char_to_ix[ch] for ch in self.data[input_start:input_end]]
        targets = [self.char_to_ix[ch] for ch in self.data[input_start+1:input_end+1]]
        self.pointer += self.seq_length
        if self.pointer + self.seq_length + 1 >= self.data_size:
            # reset pointer
            self.pointer = 0
        return inputs, targets

    def just_started(self):
        return self.pointer == 0

    def close(self):
        self.fp.close()


In [10]:
import numpy as np

class RNN:
    def __init__(self, hidden_size, vocab_size, seq_length, learning_rate):
        # hyper parameters
        self.hidden_size = hidden_size
        self.vocab_size = vocab_size
        self.seq_length = seq_length
        self.learning_rate = learning_rate
        # model parameters
        self.U = np.random.uniform(-np.sqrt(1./vocab_size), np.sqrt(1./vocab_size), (hidden_size, vocab_size))
        self.V = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./hidden_size), (vocab_size, hidden_size))
        self.W = np.random.uniform(-np.sqrt(1./hidden_size), np.sqrt(1./hidden_size), (hidden_size, hidden_size))
        self.b = np.zeros((hidden_size, 1)) # bias for hidden layer
        self.c = np.zeros((vocab_size, 1)) # bias for output

        # memory vars for adagrad,
        #ignore if you implement another approach
        self.mU = np.zeros_like(self.U)
        self.mW = np.zeros_like(self.W)
        self.mV = np.zeros_like(self.V)
        self.mb = np.zeros_like(self.b)
        self.mc = np.zeros_like(self.c)

    def softmax(self, x):
        p = np.exp(x- np.max(x))
        return p / np.sum(p)

    def forward(self, inputs, hprev):
        xs, hs, os, ycap = {}, {}, {}, {}
        hs[-1] = np.copy(hprev)
        for t in range(len(inputs)):
            xs[t] = np.zeros((self.vocab_size, 1))
            xs[t][inputs[t]] = 1 # one hot encoding , 1-of-k
            hs[t] = np.tanh(np.dot(self.U,xs[t]) + np.dot(self.W,hs[t-1]) + self.b) # hidden state
            os[t] = np.dot(self.V,hs[t]) + self.c # unnormalised log probs for next char
            ycap[t] = self.softmax(os[t]) # probs for next char
        return xs, hs, ycap

    def backward(self, xs, hs, ps, targets):
        dU, dW, dV = np.zeros_like(self.U), np.zeros_like(self.W), np.zeros_like(self.V)
        db, dc = np.zeros_like(self.b), np.zeros_like(self.c)
        dhnext = np.zeros_like(hs[0])
        for t in reversed(range(self.seq_length)):
            dy = np.copy(ps[t])
            dy[targets[t]] -= 1 # backprop into y
            dV += np.dot(dy, hs[t].T)
            dc += dc
            dh = np.dot(self.V.T, dy) + dhnext # backprop into h
            dhrec = (1 - hs[t] * hs[t]) * dh
            db += dhrec
            dU += np.dot(dhrec, xs[t].T)
            dW += np.dot(dhrec, hs[t-1].T)
            dhnext = np.dot(self.W.T, dhrec)
        for dparam in [dU, dW, dV, db, dc]:
            np.clip(dparam, -5, 5, out=dparam)
        return dU, dW, dV, db, dc

    def loss(self, ps, targets):
        return sum(-np.log(ps[t][targets[t],0]) for t in range(self.seq_length))

    def update_model(self, dU, dW, dV, db, dc):
        for param, dparam, mem in zip([self.U, self.W, self.V, self.b, self.c],
                                      [dU, dW, dV, db, dc],
                                      [self.mU, self.mW, self.mV, self.mb, self.mc]):
            mem += dparam*dparam
            param += -self.learning_rate*dparam/np.sqrt(mem+1e-8)

    def sample(self, h, seed_ix, n):
        x = np.zeros((self.vocab_size, 1))
        x[seed_ix] = 1
        ixes = []
        for t in range(n):
            h = np.tanh(np.dot(self.U, x) + np.dot(self.W, h) + self.b)
            y = np.dot(self.V, h) + self.c
            p = np.exp(y)/np.sum(np.exp(y))
            ix = np.random.choice(range(self.vocab_size), p = p.ravel())
            x = np.zeros((self.vocab_size, 1))
            x[ix] = 1
            ixes.append(ix)
        return ixes

    def train(self, data_reader, num_iterations=5000000):
        iter_num = 0
        smooth_loss = -np.log(1.0 / data_reader.vocab_size) * self.seq_length

        while iter_num <= num_iterations:  # Fixed number of iterations
            if data_reader.just_started():
                hprev = np.zeros((self.hidden_size, 1))
            inputs, targets = data_reader.next_batch()
            xs, hs, ps = self.forward(inputs, hprev)
            dU, dW, dV, db, dc = self.backward(xs, hs, ps, targets)
            loss = self.loss(ps, targets)
            self.update_model(dU, dW, dV, db, dc)
            smooth_loss = smooth_loss * 0.999 + loss * 0.001
            hprev = hs[self.seq_length - 1]
            if iter_num % 10000 == 0:
                sample_ix = self.sample(hprev, inputs[0], 200)
                generated_text = ''.join(data_reader.ix_to_char[ix] for ix in sample_ix)
                # Convert input sequence indices back to characters
                input_text = ''.join(data_reader.ix_to_char[ix] for ix in inputs)
                # Print input and generated text
                print(f"Input: {input_text}")
                print(f"Generated Text: {generated_text}")
                print(f"\n\nIteration: {iter_num}, Loss: {smooth_loss:.4f}")
            iter_num += 1

    def predict(self, data_reader, start, n):
        x = np.zeros((self.vocab_size, 1))
        chars = [ch for ch in start]
        ixes = []
        for i in range(len(chars)):
            ix = data_reader.char_to_ix[chars[i]]
            x[ix] = 1
            ixes.append(ix)

        h = np.zeros((self.hidden_size,1))
        for t in range(n):
            h = np.tanh(np.dot(self.U, x) + np.dot(self.W, h) + self.b)
            y = np.dot(self.V, h) + self.c
            p = np.exp(y)/np.sum(np.exp(y))
            ix = np.random.choice(range(self.vocab_size), p = p.ravel())
            x = np.zeros((self.vocab_size, 1))
            x[ix] = 1
            ixes.append(ix)
        txt = ''.join(data_reader.ix_to_char[i] for i in ixes)
        return txt




In [11]:
seq_length = 25
#read text from the "input.txt" file
#data_reader = DataReader("interstellar_summary.txt", seq_length)
data_reader = DataReader("interstellar_summary_condensed.txt", seq_length)
rnn = RNN(hidden_size=100, vocab_size=data_reader.vocab_size,seq_length=seq_length,learning_rate=1e-1)
rnn.train(data_reader)

Input: Interstellar is a 2014 sc
Generated Text: jf2aCjBbaciyDNvViNEqVN0se4JEviJV2,oyVSniVTMH4d,qtc 4fhDSoRkntsH4 cMyvnEllktND dbS1eHuSllfcMNatJnuqxlnTipN1o.dsa4d14qVjInlrHmrsA4tRnvwIfvs1mtoHw,DdkRfgJxvakea0eNn2dIJf2lunpjojTxkD.wlfHnRp1EeJeojHioScrp


Iteration: 0, Loss: 95.7166
Input: ugh a wormhole near Satur
Generated Text: , ye. Thes onet and Mithoss timn ar h sactunnd sca tinewlonaute newath hiss ame dila aughter Me Chant ationghy aydekste suathe femi tion and fice personama nath faceseqla 2014 scirviews daughter Murph


Iteration: 10000, Loss: 0.7935
Input: ract enabling communicati
Generated Text: n across timy sinestion Inapllmion dian Acad, Michael Ate SewathoBrsitrand It naudi awaveyletion ain ficecCoraekive with hie tesserast raugh snaelma Chastris the sawtsinrachew ghtel ffamis s atron drs


Iteration: 20000, Loss: 5.9272
Input:  It stars Matthew McConau
Generated Text: hrssteml rought andew Mcroes Msca tesseaHa d facte war fugutitiravewshoplly, Ron Bes thevilin dan wommand

KeyboardInterrupt: 

In [None]:
rnn.predict(data_reader, 'Cooper', 50)

'Cooperreomerstellaughey. A, Earitioped pols evey thr ace'

In [None]:
seed = "Cooper and Brand travel to another planet, where time moves slower than on Earth."
generated_text = rnn.predict(data_reader, seed, 100)
print(f"Seed: {seed}")
print(f"Generated Text: {generated_text}")


Seed: Cooper and Brand travel to another planet, where time moves slower than on Earth.
Generated Text: Cooper and Brand travel to another planet, where time moves slower than on Earth.erralelra Thasstilad pome Tiand. Cooper. Ite g Deylels Awacer dicer. Thesseta Chastisseyperg sols ee


In [None]:
seed = "ABABABABABABABABABAB"
generated_text = rnn.predict(data_reader, seed, 100)
print(f"Seed: {seed}")
print(f"Generated Text: {generated_text}")


Seed: ABABABABABABABABABAB
Generated Text: ABABABABABABABABABABraosithr Secoy bJestellila t farey equaeqh efe neurels ekmande pe. eq sose pore phrerst Jessetr fuce
