In [72]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

# Setup

In [73]:
# Get source file
path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
text = open(path).read()
print('corpus length:', len(text))

corpus length: 600901


In [74]:
# Create a vocabulary of chars in the text
chars = sorted(list(set(text)))
vocab_size = len(chars)+1
print('total chars:', vocab_size)

total chars: 86


In [75]:
chars.insert(0, "\0")

In [76]:
''.join(chars[1:-6])

'\n !"\'(),-.0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz'

In [77]:
char_indices = dict((c,i) for i,c in enumerate(chars))
indices_char = dict(enumerate(chars))

In [78]:
# convert whole text into (char)indices, a new representation
idx = [char_indices[c] for c in text]

In [79]:
idx[:10]

[40, 42, 29, 30, 25, 27, 29, 1, 1, 1]

In [80]:
''.join(indices_char[i] for i in idx[:70])

'PREFACE\n\n\nSUPPOSING that Truth is a woman--what then? Is there not gro'

# CS=3 model

## Setup

In [10]:
cs=3
c1_dat = [idx[i]   for i in xrange(0, len(idx)-1-cs, cs)] #0, 3, 6, ... 
c2_dat = [idx[i+1] for i in xrange(0, len(idx)-1-cs, cs)] #1, 4, 7, ...
c3_dat = [idx[i+2] for i in xrange(0, len(idx)-1-cs, cs)] #2, 5, 8, ...
c4_dat = [idx[i+3] for i in xrange(0, len(idx)-1-cs, cs)] #3, 6, 9, ...

In [11]:
x1 = np.stack(c1_dat[:-2])
x2 = np.stack(c2_dat[:-2])
x3 = np.stack(c3_dat[:-2])

In [12]:
y = np.stack(c4_dat[:-2])

In [13]:
x1[:4], x2[:4], x3[:4]

(array([40, 30, 29,  1]), array([42, 25,  1, 43]), array([29, 27,  1, 45]))

In [14]:
y[:3]

array([30, 29,  1])

In [15]:
x1.shape, y.shape

((200297,), (200297,))

In [16]:
n_fac = 42

In [17]:
def embedding_input(name, n_in, n_out):
    inp = Input (shape=(1,), dtype='int64', name=name)
    emb = Embedding(n_in, n_out, input_length=1)(inp)
    return inp, Flatten()(emb)

In [18]:
c1_in, c1 = embedding_input('c1', vocab_size, n_fac)
c2_in, c2 = embedding_input('c2', vocab_size, n_fac)
c3_in, c3 = embedding_input('c3', vocab_size, n_fac)

## Model

In [19]:
# Number of neurons in the hidden layer
n_hidden = 256

In [20]:
# Create a skeleton of a dense hidden layer w/ n_hidden neurons and relu activation
# Partial function
dense_in = Dense(n_hidden, activation='relu')

In [21]:
# Create the dense layer w/ the c1 embeddings
c1_hidden = dense_in(c1)

In [22]:
dense_hidden = Dense(n_hidden, activation='tanh')

In [23]:
c2_dense = dense_in(c2) # Create a second layer embedding
hidden_2 = dense_hidden(c1_hidden) #
c2_hidden = merge([c2_dense, hidden_2])

In [24]:
c3_dense = dense_in(c3) # Create a second layer embedding
hidden_3 = dense_hidden(c2_hidden) #
c3_hidden = merge([c3_dense, hidden_3])

In [25]:
dense_out = Dense(vocab_size, activation='softmax')

In [26]:
c4_out = dense_out(c3_hidden)

In [27]:
model = Model([c1_in, c2_in, c3_in], c4_out)

In [28]:
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [29]:
model.optimizer.lr = 0.001

In [30]:
model.fit([x1,x2,x3], y, batch_size=64, nb_epoch=1)

Epoch 1/1


<keras.callbacks.History at 0x11142f2d0>

In [31]:
np.array(2)[np.newaxis]

array([2])

In [32]:
np.array([1,2,3])

array([1, 2, 3])

In [33]:
def get_next(inp):
    idxs = [char_indices[c] for c in inp]
    arrs = [np.array(i)[np.newaxis] for i in idxs]
    p = model.predict(arrs)
    i = np.argmax(p)
    return chars[i]

In [34]:
get_next('phi')

's'

In [35]:
get_next(' th')

'e'

In [36]:
get_next(' an')

'd'

# RNN (Recurrent Neural Network)
## char 1 to n-1 -> char n

In [37]:
cs=8

In [38]:
# Break text up into 8 shifted skip series of 0,8,16,24,... 1,9,17,25,... etc. chars
c_in_dat = [[idx[i+n] for i in xrange(0,len(idx)-1-cs, cs)] 
           for n in range(cs)]

In [39]:
c_out_dat = [idx[i+cs] for i in xrange(0, len(idx)-1-cs, cs)]

In [40]:
xs = [np.stack(c[:-2]) for c in c_in_dat] # Convert inputs to list of lists to list of arrays

In [41]:
len(xs), xs[0].shape

(8, (75110,))

In [42]:
y = np.stack(c_out_dat[:-2])

In [43]:
[xs[n][:cs] for n in range(cs)]

[array([40,  1, 33,  2, 72, 67, 73,  2]),
 array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67])]

In [44]:
y[:cs]

array([ 1, 33,  2, 72, 67, 73,  2, 68])

In [45]:
n_fac = 42 # Embed into 42 latent degrees of freedom

In [46]:
def imbedding_input(name, n_in, n_out):
    inp = Input(shape=(1,), dtype='int64', name=name+'_in')
    emb = Embedding(n_in, n_out, input_length=1, name=name+'_emb')(inp)
    return inp, Flatten()(emb)

In [47]:
# Create the input, embedding layer for each char input
# c_ins is the list of (input, embedding) layers
c_ins = [embedding_input('c'+str(n), vocab_size, n_fac) for n in range(cs)]

In [48]:
n_hidden = 256

In [49]:
# Actual model building blocks to be used
dense_in = Dense(n_hidden, activation='relu')
dense_hidden = Dense(n_hidden, activation='relu', init='identity')
dense_out = Dense(vocab_size, activation='softmax')

In [50]:
# Zeroth char's [0] embed [1] goes into first Dense layer
hidden = dense_in(c_ins[0][1]) 

In [51]:
# Recurrent part: run last char's embedding through dense layer,
# input all other cs-1 chars in order, embed them, 
# merge this layer with last chars' dense layer
# for i in range(cs):
#     hidden = dense_hidden(hidden)
#     c_dense = dense_in(c_ins[i][1])
#     hidden = merge([c_dense, hidden])

In [54]:
# Recursive network steps
# Recurrent part: run last char's embedding through dense layer,
# input all other cs-1 chars in order, embed them, 
# merge this layer with last chars' dense layer
def rnn(hidden, i, n):
    i += 1
    if(i > n): return hidden
    hidden = dense_hidden(hidden)
    c_dense = dense_in(c_ins[i-1][1])
    hidden = merge([c_dense, hidden])
    return rnn(hidden, i, n)

In [55]:
hidden = rnn(hidden, 0, cs)

In [56]:
c_out = dense_out(hidden)

In [57]:
model = Model([c[0] for c in c_ins], c_out) # inputs and output
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [58]:
model.fit(xs, y, batch_size=64, nb_epoch=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x115a9b310>

## Test model

In [59]:
def get_next(inp):
    idxs = [np.array(char_indices[c])[np.newaxis] for c in inp]
    p = model.predict(idxs)
    return chars[np.argmax(p)]

In [60]:
get_next('for thos')

' '

In [61]:
get_next('part of ')

't'

In [63]:
get_next('queens a')

'n'

## Sentiment analysis

In [64]:
from keras.datasets import imdb
idx = imdb.get_word_index()

Downloading data from https://s3.amazonaws.com/text-datasets/imdb_word_index.pkl


In [66]:
idx_arr = sorted(idx, key=idx.get)
idx_arr[:10]

['the', 'and', 'a', 'of', 'to', 'is', 'br', 'in', 'it', 'i']

In [67]:
idx2word = {v: k for k, v in idx.iteritems()}

# RNN 
## char 1 to n-1 -> char 2 to n

In [81]:
cs=8

In [95]:
# Break text up into 8 shifted skip series of 0,8,16,24,... 1,9,17,25,... etc. chars
c_in_dat = [[idx[i+n] for i in xrange(0,len(idx)-1-cs, cs)] 
           for n in range(cs)]

In [101]:
# Output 1,9,17,25,... 2,10,18,26,... etc. chars
# Note that this setup "memorizes" the context!!
c_out_dat = [[idx[i+n] for i in xrange(1,len(idx)-cs, cs)] 
           for n in range(cs)]

In [102]:
xs = [np.stack(c[:-2]) for c in c_in_dat]

In [103]:
ys = [np.stack(c[:-2]) for c in c_out_dat]

In [104]:
[xs[n][:cs] for n in range(cs)]

[array([40,  1, 33,  2, 72, 67, 73,  2]),
 array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67])]

In [105]:
[ys[n][:cs] for n in range(cs)]

[array([42,  1, 38, 44,  2,  9, 61, 73]),
 array([29, 43, 31, 71, 54,  9, 58, 61]),
 array([30, 45,  2, 74,  2, 76, 67, 58]),
 array([25, 40, 73, 73, 76, 61, 24, 71]),
 array([27, 40, 61, 61, 68, 54,  2, 58]),
 array([29, 39, 54,  2, 66, 73, 33,  2]),
 array([ 1, 43, 73, 62, 54,  2, 72, 67]),
 array([ 1, 33,  2, 72, 67, 73,  2, 68])]

In [45]:
n_fac = 42 # Embed into 42 latent degrees of freedom

In [46]:
def imbedding_input(name, n_in, n_out):
    inp = Input(shape=(1,), dtype='int64', name=name+'_in')
    emb = Embedding(n_in, n_out, input_length=1, name=name+'_emb')(inp)
    return inp, Flatten()(emb)

In [47]:
# Create the input, embedding layer for each char input
# c_ins is the list of (input, embedding) layers
c_ins = [embedding_input('c'+str(n), vocab_size, n_fac) for n in range(cs)]

In [48]:
n_hidden = 256

In [185]:
# Actual model building blocks to be used
dense_in = Dense(n_hidden, activation='relu')
dense_hidden = Dense(n_hidden, activation='relu', init='identity')
dense_out = Dense(vocab_size, activation='softmax', name = 'output')

In [186]:
# Start with an empty layer
inp1 = Input(shape=(n_fac,), name='zeros')
hidden = dense_in(inp1) 

In [187]:
# Recurrent part: run last char's embedding through dense layer,
# input all other cs-1 chars in order, embed them, 
# merge this layer with last chars' dense layer

outs = []
for i in range(cs): # Start from 0 now
    c_dense = dense_in(c_ins[i][1])
    hidden = dense_hidden(hidden)
    hidden = merge([c_dense, hidden], mode = 'sum')
    # every layer matters now:
    outs.append(dense_out(hidden)) # output from all chars

In [188]:
model = Model([inp1] + [c[0] for c in c_ins], outs) # inputs and output
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam())

In [189]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
c0 (InputLayer)                  (None, 1)             0                                            
____________________________________________________________________________________________________
embedding_4 (Embedding)          (None, 1, 42)         3612        c0[0][0]                         
____________________________________________________________________________________________________
zeros (InputLayer)               (None, 42)            0                                            
____________________________________________________________________________________________________
dense_10 (Dense)                 (None, 256)           11008       zeros[0][0]                      
                                                                   flatten_4[0][0]         

In [190]:
zeros = np.tile(np.zeros(n_fac), (len(xs[0]), 1))

In [191]:
model.fit([zeros]+xs, ys, batch_size=64, nb_epoch=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x127c6a9d0>

In [192]:
model.fit([zeros]+xs, ys, batch_size=64, nb_epoch=2)

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x127c6aad0>

In [193]:
model.fit([zeros]+xs, ys, batch_size=64, nb_epoch=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x127c6ac50>

In [198]:
def get_nexts(inp):
    idxs = [char_indices[c] for c in inp]
    arrs = [np.array(i)[np.newaxis] for i in idxs]
    p = model.predict([np.zeros(n_fac)[np.newaxis,:]] + arrs)
    print(list(inp))
    return [chars[np.argmax(o)] for o in p]

In [199]:
get_nexts(' this is')

[' ', 't', 'h', 'i', 's', ' ', 'i', 's']


['t', 'h', 'e', 't', ' ', 's', 'n', ' ']

In [111]:
!git add rnn.ipynb

In [112]:
!git commit -m "Add 0 to n-1 -> n model and the 0 to n-1 -> 1 to n model"

[master 07ffda8] Add 0 to n-1 -> n model and the 0 to n-1 -> 1 to n model
 1 file changed, 831 insertions(+), 109 deletions(-)


In [113]:
!git push -u origin master

Counting objects: 3, done.
Delta compression using up to 8 threads.
Compressing objects: 100% (3/3), done.
Writing objects: 100% (3/3), 6.95 KiB | 0 bytes/s, done.
Total 3 (delta 1), reused 0 (delta 0)
remote: Resolving deltas: 100% (1/1), completed with 1 local objects.[K
To github.com:szalmaf/recurrent_neural_network.git
   5c0fde2..07ffda8  master -> master
Branch master set up to track remote branch master from origin.
