In [1]:
import os
import numpy as np
from RNN import *
from tqdm import tqdm
from IPython.display import clear_output

%load_ext autoreload
%autoreload 2

from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))

# 1) Checking the mapper and predict function

In [30]:
rnn = RNN()
print(rnn.mapper[rnn.mapper["Passion"]]) # from string to array of one hot-encoded columns and back
print(rnn.mapper[1]) # from index to char
one_hot = rnn.mapper[np.array([1,2,5,77,3,1,2,0])]# from list of indexes to array of one hot-encoded vectors
print(rnn.word(one_hot))

a = rnn.predict2(x=rnn.mapper["Harry"], h=np.ones(100), n=4)
print(a.shape) # array with hot encoded columns
print(rnn.word(a)) #word and mapper are same
print(len(rnn.mapper[a]))

# 3) Check gradients numerically

In [22]:
# initialize
rnn_check = RNN(m=5)
h0 = np.random.rand(5,1)
seq_length = 25
# words                              # one hot
X_chars= book_data[:seq_length];     X = rnn_check.mapper[X_chars]
Y_chars = book_data[1:seq_length+1]; Y = rnn_check.mapper[Y_chars]

grad_b_, grad_c_, grad_U_, grad_W_, grad_V_ = comp_gradients(rnn_check, X, Y, h0)
grad_b, grad_c, grad_U, grad_W, grad_V = ComputeGradsNumSlow(rnn_check,X, Y, h0,1e-4)

## grad_b
maks_bias = np.max( np.abs(np.concatenate((grad_b[np.newaxis,:],grad_b_[np.newaxis,:]), 
                  axis = 0)),axis = 0)
err_bias = np.nansum(np.abs(grad_b - grad_b_)/maks_bias)
print("Sum of relative weights error for the b :",err_bias)

## grad_c
maks_bias = np.max( np.abs(np.concatenate((grad_c[np.newaxis,:],grad_c_[np.newaxis,:]), 
                  axis = 0)),axis = 0)
err_bias = np.nansum(np.abs(grad_c - grad_c_)/maks_bias)
print("Sum of relative weights error for the c :",err_bias)

#grad W
maks_weight = np.max(np.abs(np.concatenate((grad_W[np.newaxis,:,:], grad_W_[np.newaxis,:,:]),
                                           axis = 0)),axis = 0)
err_weights = np.nansum(np.abs(grad_W - grad_W_)/maks_weight)
print("Sum of relative weights error for the W :",err_weights)

#grad V
maks_weight = np.max(np.abs(np.concatenate((grad_V[np.newaxis,:,:], grad_V_[np.newaxis,:,:]),
                                           axis = 0)),axis = 0)
err_weights = np.nansum(np.abs(grad_V - grad_V_)/maks_weight)
print("Sum of relative weights error for the V :",err_weights)

#grad U
maks_weight = np.max(np.abs(np.concatenate((grad_U[np.newaxis,:,:], grad_U_[np.newaxis,:,:]),
                                           axis = 0)),axis = 0)
err_weights = np.nansum(np.abs(grad_U - grad_U_)/maks_weight)
print("Sum of relative weights error for the U :",err_weights)

CALCULATING NUMERICAL GRADIENTS
Sum of relative weights error for the b : 2.0101982766669755e-08
Sum of relative weights error for the c : 1.1380771223064707e-07
Sum of relative weights error for the W : 4.1351267983285206e-07
Sum of relative weights error for the V : 1.1774052302721346e-06
Sum of relative weights error for the U : 2.799743233438285e-07




# 3)a Train RNN 

In [46]:
# initialize
rnn = RNN()
seq_length = 25
# words                              # one hot
X_chars= book_data[:seq_length];     X = rnn.mapper[X_chars]
Y_chars = book_data[1:seq_length+1]; Y = rnn.mapper[Y_chars]
# init hidden state
h0 = np.zeros((100,1))
loss = rnn.backward(X, Y, h0)
print(loss)
ll = 0

# training loop
for k in range(8):
    for i in tqdm(range(0,len(book_data)-25,seq_length)):
            
        # words                                  # one hot
        X_chars= book_data[i:i+seq_length];      X = rnn.mapper[X_chars]
        Y_chars = book_data[i+1:i+seq_length+1]; Y = rnn.mapper[Y_chars]
        
        # display loss every 250th update
        if np.mod(ll,250)==0:
            clear_output(wait=True)
            print(loss) 
        
        # synthesize a text of length 250 letters every 500th update
        if np.mod(ll,500)==0:
            txt = rnn.predict(x=X[:,0], h=rnn.h, n=250)
            if np.mod(ll,1000)==0: 
                with open('out.txt', 'a') as f:
                    print("\n*iter =*" +str(ll)+"*, smooth_loos=*"+str(loss)+"\n", file=f)
                    print("".join(rnn.word(txt)), file=f)
            printmd("**iter =**" +str(ll)+"**, smooth_loos=**"+str(loss)+"\n" )
            print("".join(rnn.word(txt)))
            
        # Reset init hidden state every new epoche
        if i == 0:
            rnn.h=None
        loss = 0.999*loss + 0.001*rnn.backward(X, Y, h0); ll +=1

100%|█████████▉| 44159/44301 [08:46<00:01, 97.90it/s]

41.344818199542225


100%|██████████| 44301/44301 [08:47<00:00, 83.98it/s]


# 3)bSynthesize a text of length 2000

In [29]:
tmp = rnn.predict(x=rnn.mapper[" "], h=np.zeros((100,1)), n=2000)
text = "".join(rnn.word(tmp))
print(text)