# Imports

In [1]:
import numpy as np

# I/O

In [2]:
# Get the database from a list of characters that is found in the input.txt
# should be simple plain text file
data = open('input.txt', 'r').read() 

In [3]:
# get the list of characters by first identifying the set() of characters and place them in a list()
# NOTE: set() function here creates an unorders collection with no duplicate elements
chars = list(set(data))

In [4]:
# obtain the size of the data and the size of the vocabulary we have
data_size, vocab_size = len(data), len(chars)

In [5]:
# enumerate the characters and give indices to them
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# Parameters

## Hyper

In [6]:
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

## Model

In [7]:
# model parameters
concatHiddenLayerInputLayerSize = vocab_size + hidden_size

Wih=np.random.randn(concatHiddenLayerInputLayerSize, hidden_size) 
Wfh=np.random.randn(concatHiddenLayerInputLayerSize, hidden_size) 
Woh=np.random.randn(concatHiddenLayerInputLayerSize, hidden_size) 
Wgh=np.random.randn(concatHiddenLayerInputLayerSize, hidden_size) 
Wch=np.random.randn(concatHiddenLayerInputLayerSize, hidden_size) 
Wyh=np.random.randn(hidden_size, vocab_size) 

bi=np.zeros((1, hidden_size))
bf=np.zeros((1, hidden_size))
bo=np.zeros((1, hidden_size))
bg=np.zeros((1, hidden_size))
bc=np.zeros((1, hidden_size))
by=np.zeros((1, hidden_size))


# Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
# Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
# Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
# bh = np.zeros((hidden_size, 1)) # hidden bias
# by = np.zeros((vocab_size, 1)) # output bias

# Functions

## Sigmoid

In [8]:
def sigmoid(x):
    sigmoidOut = 1 / (1 + np.exp(-x))    
    return sigmoidOut  

In [9]:
def dsigmoid(x):
    dsigmoidOut = sigmoid(x)*(1 - sigmoid(x))
    return dsigmoidOut  

## Tanh

In [10]:
def tanh(x):
    tanhOut = np.tanh(x)
    return tanhOut

In [11]:
def dtanh(x):
    dtanh = 1 - tanh(x)**2

# RNN 

## init

In [12]:
# keep track of the data index as well as the iteration 
# n = interation counter
# p = data pointer
n, p = 0, 0

In [13]:
# initialize the weights all to zero
mWih = np.zeros_like(Wih)
mWfh = np.zeros_like(Wfh)
mWoh = np.zeros_like(Woh)
mWgh = np.zeros_like(Wgh)
mWch = np.zeros_like(Wch)
mWyh = np.zeros_like(Wyh)

# Bias
mbi = np.zeros_like(bi)
mbf = np.zeros_like(bf)
mbo = np.zeros_like(bo)
mbg = np.zeros_like(bg)
mbc = np.zeros_like(bc)
mby = np.zeros_like(by)

## Training Start

In [14]:
# Step #1 Truncated Backprop through time paradigm
# prepare inputs (we're sweeping from left to right in steps seq_length long)
# Because we are doing truncated backprop thorugh time (by 25 steps) we need to see if we get an overflow.  If so then we need
# to initialize everything back to zero.  this means that the previous hidden state is now 0 and the data pointer is also back
# to zero
if p+seq_length+1 >= len(data) or n == 0: 
    # reset RNN memory
    hprev = np.zeros((1,hidden_size))
    cprev = np.zeros((1,hidden_size))
    # go from start of data
    p = 0 #

In [15]:
# Step #2 - Obtain Inputs
# Get the sequence of inputs in the database with length seq_length
# input is a list of indicies in the char_to_ix
inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]  

In [16]:
# Step #3 - Obtain Outputs
# Get the sequence of outputs in the database with length seq_length
# basically the next character of the input
targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

### Forward prop

In [17]:
# inits for forward pass
# xs: input state
# hs: hidden state
# ys: output state
# ps: propbability state
xs, hs, cs, ys, ps = {}, {}, {}, {},{}
hs[-1] = np.copy(hprev)
cs[-1] = np.copy(cprev)
loss = 0

In [18]:
t = 0

In [19]:
# one-hot-encoder
# get zeros
xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
# place a one where the input index is
xs[t][inputs[t]] = 1

In [20]:
xc = np.concatenate((hprev[0],np.ravel(xs[t])),axis=0)

In [21]:
it = sigmoid(np.dot(xc,Wih) + bi)
ft = sigmoid(np.dot(xc,Wfh) + bf)
ot = sigmoid(np.dot(xc,Woh) + bo)
gt = tanh(np.dot(xc,Wgh) + bg)

In [22]:
ct = ft.T*cprev + it *gt
ht = ot * tanh(ct)

In [23]:
np.shape(ct)

(100L, 100L)

In [24]:
np.shape(ot)

(1L, 100L)

In [25]:
np.shape(ht)

(100L, 100L)