# Recurrent Neural Network

In [4]:
data = open('2states.txt', 'r').read()

chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d chars and %d unique vocabulary'%(data_size, vocab_size))

data has 463189 chars and 83 unique vocabulary


In [3]:
char2index = {char:i for i, char in enumerate(chars)}
index2char = {i:char for i, char in enumerate(chars)}
print(char2index)
print(index2char)

{'w': 0, 'h': 1, 'D': 2, '!': 3, 's': 4, '&': 5, 'g': 6, 'r': 7, 'k': 8, '*': 9, 'I': 10, '1': 11, 'o': 12, ')': 13, '"': 14, 'q': 15, '6': 16, 'y': 17, 'f': 18, 'Y': 19, 'b': 20, "'": 21, 'm': 22, 'v': 23, 'd': 24, 'T': 25, 'K': 26, 'A': 27, ';': 28, '<': 29, 'O': 30, 'j': 31, 'c': 32, 'p': 33, '^': 34, '8': 35, '9': 36, '%': 37, 'i': 38, '3': 39, 'W': 40, 'n': 41, 'S': 42, 'M': 43, 'J': 44, ' ': 45, 'V': 46, '2': 47, 'a': 48, 'L': 49, '/': 50, 'G': 51, 'l': 52, '?': 53, ',': 54, 'P': 55, 'N': 56, 'X': 57, 'H': 58, 'E': 59, '@': 60, 'Q': 61, '.': 62, 'Z': 63, '0': 64, '4': 65, 'R': 66, 'z': 67, 'U': 68, 'C': 69, 'e': 70, 't': 71, '5': 72, '(': 73, '—': 74, '-': 75, 'u': 76, 'B': 77, 'x': 78, '\n': 79, ':': 80, '7': 81, 'F': 82}
{0: 'w', 1: 'h', 2: 'D', 3: '!', 4: 's', 5: '&', 6: 'g', 7: 'r', 8: 'k', 9: '*', 10: 'I', 11: '1', 12: 'o', 13: ')', 14: '"', 15: 'q', 16: '6', 17: 'y', 18: 'f', 19: 'Y', 20: 'b', 21: "'", 22: 'm', 23: 'v', 24: 'd', 25: 'T', 26: 'K', 27: 'A', 28: ';', 29: '<', 

In [5]:
import numpy as np
vector_for_char_a = np.zeros((vocab_size, 1))
vector_for_char_a[char2index['a']] = 1
print(vector_for_char_a)

[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]]


In [27]:
#hyperparameters
hidden_size = 83
seq_length = 25
learning_rate = 1e-1

In [32]:
Wxh = np.random.randn(hidden_size, vocab_size) *0.01 #input to hidden
Whh = np.random.randn(hidden_size, hidden_size) *0.01 #hidden to hidden - recursive
Why = np.random.randn(vocab_size, vocab_size) *0.01 #hidden to output
bh = np.zeros((hidden_size, 1))
by = np.zeros((vocab_size, 1))

In [33]:
def lossFunc(inputs, targets, hprev):
    """                                                                                                                                                                                         
  inputs,targets are both list of integers.                                                                                                                                                   
  hprev is Hx1 array of initial hidden state                                                                                                                                                  
  returns the loss, gradients on model parameters, and last hidden state                                                                                                                      
  """
    #store our inputs, hidden states, outputs, and probability values    
    # Each of these are going to be SEQ_LENGTH(Here 25) long dicts i.e. 1 vector per time(seq) step
    # xs will store 1 hot encoded input characters for each of 25 time steps (26, 25 times)
    # hs will store hidden state outputs for 25 time steps (100, 25 times)) plus a -1 indexed initial state
    # to calculate the hidden state at t = 0
    # ys will store targets i.e. expected outputs for 25 times (26, 25 times), unnormalized probabs
    # ps will take the ys and convert them to normalized probab for chars
    # We could have used lists BUT we need an entry with -1 to calc the 0th hidden layer
    # -1 as  a list index would wrap around to the final element
    xs, hs, ys, ps = {}, {}, {}, {}
    
    #init with previous hidden state
    # Using "=" would create a reference, this creates a whole separate copy
    # We don't want hs[-1] to automatically change if hprev is changed
    hs[-1] = np.copy(hprev)
    loss = 0
    
    #forward pass
    for t in range(len(inputs)):
        xs[t] = np.zeros((vocab_size, 1))
        xs[t][inputs[t]] = 1
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh)
        ys[t] = np.dot(Why, hs[t]) + by
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))
        loss += -np.log(ps[t][targets[t], 0])
        
    #backward propogation
    #initalize vectors for gradient values for each set of weights
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dhnext = np.zeros_like(hs[0])
    
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])
        dy[targets[t]] -= 1
        dWhy += np.dot(dy, hs[t].T)
        dby += dy
        dh = np.dot(Why.T, dy) + dhnext
        dhraw = (1 - hs[t]*hs[t]) * dh
        dbh += dhraw
        dWxh += np.dot(dhraw, xs[t].T)
        dWhh += np.dot(dhraw, hs[t-1].T)
        dhnext = np.dot(Whh.T, dhraw)
        
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -5, 5, out=dparam)
    
    return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]
        

In [36]:
#prediction, one full forward pass
def sample(h, seed_ix, n):
  """                                                                                                                                                                                         
  sample a sequence of integers from the model                                                                                                                                                
  h is memory state, seed_ix is seed letter for first time step   
  n is how many characters to predict
  """
  #create vector
  x = np.zeros((vocab_size, 1))
  #customize it for our seed char
  x[seed_ix] = 1
  #list to store generated chars
  indexes = []
  #for as many characters as we want to generate
  for t in range(n):
    #a hidden state at a given time step is a function 
    #of the input at the same time step modified by a weight matrix 
    #added to the hidden state of the previous time step 
    #multiplied by its own hidden state to hidden state matrix.
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    #compute output (unnormalised)
    y = np.dot(Why, h) + by
    ## probabilities for next chars
    p = np.exp(y) / np.sum(np.exp(y))
    #pick one with the highest probability 
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    #create a vector
    x = np.zeros((vocab_size, 1))
    #customize it for the predicted char
    x[ix] = 1
    #add it to the list
    indexes.append(ix)

  txt = ''.join(index2char[ix] for ix in indexes)
  print('----\n %s \n----' % (txt, ))

In [37]:
hprev = np.zeros((hidden_size, 1)) # reset RNN memory  
#predict the 200 next characters given 'a'

print(hprev.shape)
print(len(char2index))

sample(hprev, char2index['a'], 200)

(83, 1)
83
----
  'R0t3246;qMlO9S-AJ2cI272qf
X—!CaBLtXdlwb;CA8A;v6Cob.l"%—uG ZQ
DHHb87AF7/XwVq&"yeNPXYcX4E:YoN6fq.FubNp.7E-)A@,Zdu^^plo4L eU,Ja0qy(iJfKxUH%NVxzr1cql)(0!NY%tdAisGk"KbiC(V5.Qa3Lq0t13Rvy%JS)r3,EuYvZMuFQa— 
----


In [38]:
p=0  
inputs = [char2index[ch] for ch in data[p:p+seq_length]]
print("inputs", inputs)
targets = [char2index[ch] for ch in data[p+1:p+seq_length+1]]
print("targets", targets)

inputs [49, 12, 23, 70, 45, 22, 48, 7, 7, 38, 48, 6, 70, 4, 45, 48, 7, 12, 76, 41, 24, 45, 71, 1, 70]
targets [12, 23, 70, 45, 22, 48, 7, 7, 38, 48, 6, 70, 4, 45, 48, 7, 12, 76, 41, 24, 45, 71, 1, 70, 45]


In [44]:
for i in range(len(inputs)):
    print(index2char[i])

w
h
D
!
s
&
g
r
k
*
I
1
o
)
"
q
6
y
f
Y
b
'
m
v
d


In [46]:
for i in range(len(targets)):
    print(index2char[i])

w
h
D
!
s
&
g
r
k
*
I
1
o
)
"
q
6
y
f
Y
b
'
m
v
d


In [42]:
n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad                                                                                                                
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0                                                                                                                        
while n<=1000*100:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  # check "How to feed the loss function to see how this part works
  if p+seq_length+1 >= len(data) or n == 0:
    hprev = np.zeros((hidden_size,1)) # reset RNN memory                                                                                                                                      
    p = 0 # go from start of data                                                                                                                                                             
  inputs = [char2index[ch] for ch in data[p:p+seq_length]]
  targets = [char2index[ch] for ch in data[p+1:p+seq_length+1]]

  # forward seq_length characters through the net and fetch gradient                                                                                                                          
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFunc(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001

  # sample from the model now and then                                                                                                                                                        
  if n % 1000 == 0:
    print ('iter %d, loss: %f' % (n, smooth_loss)) # print progress
    sample(hprev, inputs[0], 200)

  # perform parameter update with Adagrad                                                                                                                                                     
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by],
                                [dWxh, dWhh, dWhy, dbh, dby],
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update                                                                                                                   

  p += seq_length # move data pointer                                                                                                                                                         
  n += 1 # iteration counter

iter 0, loss: 110.471010
----
 Bh5XU(bDg/HPgP:FWY&q
"SfORbvtSNhk*HxOnB
6lu;L
T9YFlvf;l/x^bDP4?SFChtg4OF)!J0T1Trbu5HRG9jF5m-T*j^Ucw26ODdx3i)ZtU4e,IZpw/Qw)co.se&Lpdc"
XN*MivxqYN4i-3lPfT8nQ,:bo<9g(s-(0ACxum/.8UV7F,!*
n5qL&KV^RE)KMhoPX 
----
iter 1000, loss: 84.517805
----
 l bow bour s one norve med. wot ot oug oudyom Emiag. the se inm thlet I Ther piur led thyo heit zoutt aos be whewping, Ahom t in ha jMop on y the Humg the ther to oij tuppdulem 

'ine she me top 
mdon 
----
iter 2000, loss: 67.985109
----
 whan't waipfowheultit. I ot? Dom ca asbe dithein.' 

Sowelt?'s tatitheunytithext. 

'Woren tedt yat in. Soutsare. 'Dant,' 

'Yon't gommtep i!'rlmere to were, 

'calictate fted Ao dhutow. 

'
'Sooreint 
----
iter 3000, loss: 61.217468
----
 d at ya.' 



'Lviguerfd tKed dand cant mevo ofocfnd.' Anfalg.?' 

Dank sat. 

'Youtt in'. She aprat an. Yat th' 
'OY bergiat. 
'NThet fo?' Whe to,' I jut an ang. 
''Gron the woug. 

'
'Shoksas mon. I 
----
iter 4000, loss: 57.068234
----
 g nor. Sh

iter 35000, loss: 45.481258
----
 oilturtw shoopned. In's the nithan't Chenplivion saidnanted my my poed?' me abither. 

I coarentilfarage have me.' 



 



'Why gret.' sh Ninciout? 

'Meailarates tritharpare sesed i fith. I cacked a 
----
iter 36000, loss: 45.840456
----
 s, gorned bea? 

'You das we get. lake figk's th. Citen thabla sa a dageer nespucked toow hord tile motin, palaly uaclvee waind up weans. Mruch a dore for housare, I fay dabning orees. 
Shey said Anan 
----
iter 37000, loss: 46.213225
----
 d to vinver and do I marrer have it hasded in in mK,' Yesestidion mothing onas frilss, my toll, I ham, 

I She's colled 
incsyort him, his she ies. 

I my you are alparch abimso dever for you-reed of  
----
iter 38000, loss: 47.922368
----
 verme my And tamite shouqun in Ansuntreanis keo. He did and forn dithel,' I said. 
'I paobu evengo her caimered at to help doft fromer in Dan une a here. 

'Thater evenst's to zs stwe-preatsaning had  
----
iter 39000, loss: 46.479755
----
 th

iter 69000, loss: 43.930105
----
 aca ledtock Now it some mothnr with the genver gotherastwa.' 



 



 



'What becked. Fext days. 

'Momed to be for had the prople.' 

'Moked. I wourgew her, dhatso?' you hove Maked one beivics. I  
----
iter 70000, loss: 44.200585
----
  ton, when ans two 
creador kind. EThure 
came bech the up want the thear las at his rike had it. Ananyed gropged blys' me't thise Dutule wants. She oined aldord. 

As. 

'I bereven't fopwait. We hase 
----
iter 71000, loss: 43.383751
----
  best que, for the knone.' 

'Sid. Ananya said. 
'OK, temmagay cood. I swome to mother want., Ananya 
soo me thisz my 
gong dingected for'ghilurs at mece, I doares ey sitichat,' 

'Acc and whrineC Shu 
----
iter 72000, loss: 43.251030
----
 ddab, sort.' 

'Keral oul brenger Stry forders for or. Anywe, you seppeved. 

'Asa refivacclet to mict. No mece or?' Ana Shipts her a dolldife my be. When, tre toricite't vist why 
in the fies. she of 
----
iter 73000, loss: 43.515786
----
 e 