# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Redes-Recorrentes" data-toc-modified-id="Redes-Recorrentes-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Redes Recorrentes</a></div><div class="lev2 toc-item"><a href="#SimpleRNN" data-toc-modified-id="SimpleRNN-11"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>SimpleRNN</a></div><div class="lev3 toc-item"><a href="#modelo-básico" data-toc-modified-id="modelo-básico-111"><span class="toc-item-num">1.1.1&nbsp;&nbsp;</span>modelo básico</a></div><div class="lev3 toc-item"><a href="#retornando-todas-as-saídas" data-toc-modified-id="retornando-todas-as-saídas-112"><span class="toc-item-num">1.1.2&nbsp;&nbsp;</span>retornando todas as saídas</a></div><div class="lev3 toc-item"><a href="#stateful" data-toc-modified-id="stateful-113"><span class="toc-item-num">1.1.3&nbsp;&nbsp;</span>stateful</a></div><div class="lev2 toc-item"><a href="#LSTM" data-toc-modified-id="LSTM-12"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>LSTM</a></div><div class="lev3 toc-item"><a href="#testando..." data-toc-modified-id="testando...-121"><span class="toc-item-num">1.2.1&nbsp;&nbsp;</span>testando...</a></div><div class="lev2 toc-item"><a href="#GRU" data-toc-modified-id="GRU-13"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>GRU</a></div><div class="lev3 toc-item"><a href="#testando..." data-toc-modified-id="testando...-131"><span class="toc-item-num">1.3.1&nbsp;&nbsp;</span>testando...</a></div>

# Redes Recorrentes

In [1]:
%matplotlib inline
import matplotlib.pyplot as plot
from IPython import display
from __future__ import print_function

import os
import sys
import glob
import numpy as np
import numpy.random as nr

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.layers import Dense, Input, Flatten, Dropout
from keras.layers import Conv1D, MaxPooling1D, Embedding
from keras.layers.recurrent import SimpleRNN, LSTM, GRU
from keras.models import Model
from keras.optimizers import (SGD, 
                              RMSprop, 
                              Adam, 
                              Adadelta, 
                              Adagrad)

sys.path.append('../src')
from my_keras_utilities import (get_available_gpus, 
                                load_model_and_history, 
                                save_model_and_history, 
                                TrainingPlotter)

os.makedirs('../../models',exist_ok=True)
np.set_printoptions(precision=3, linewidth=120, suppress=True)

Using TensorFlow backend.


In [2]:
import keras.backend as K

# K.set_image_data_format('channels_first')
K.set_floatx('float32')

print('Backend:        {}'.format(K.backend()))
print('Data format:    {}'.format(K.image_data_format()))
print('Available GPUS:', get_available_gpus())

Backend:        tensorflow
Data format:    channels_first
Available GPUS: []
Encoding:       utf-8


In [3]:
class MyRecurrentNet:
    def __init__(self, n_units, input_shape=None, return_sequences=False, stateful=False):
        self.return_sequences = return_sequences
        self.stateful = stateful
        self.n_steps, self.n_inputs = input_shape
        self.n_units = n_units
        self.h0 = None
        self.Wx = None
        self.Ws = None
        self.b = 0
        
    def set_weights(self, Wx, Ws, b):
        self.Wx = Wx
        self.Ws = Ws
        self.b = b
        
    def predict(self, X, batch_size=None):
        n_samples, n_steps, n_inputs = X.shape
        assert n_inputs == self.n_inputs
        assert n_steps == self.n_steps
        batch_size = batch_size or n_samples
        n_batches = int(np.ceil(n_samples / batch_size)) 
        yb = []
        for i in range(n_batches):
            Xb = X[i*batch_size:(i+1)*batch_size]
            yb.append(self._predict_one_batch(Xb))
        return np.vstack(yb)
        
    def _predict_one_batch(self, X):
        raise NotImplementedError("This method should be implemented in a derived class.")

    def _init_state(self, n_samples):
        if self.stateful and self.h0 is not None:
            h = self.h0
            s = self.s0
        else:
            h = np.zeros((n_samples, self.n_units), np.float)
            s = np.zeros((n_samples, self.n_units), np.float)
        return h, s
    
    def _get_output(self, h, s=(None,)):
        self.h0 = h[-1]
        self.s0 = s[-1]
        if self.return_sequences:
            output = np.stack(h[1:], axis=-1).transpose(0, 2, 1)
        else:
            output = h[-1]
        return output

    @staticmethod
    def _sigmoid(z):
        z = np.asarray(z)
        return np.ones(z.shape)/(1.0 + np.exp(-z))
    
    @staticmethod
    def _hard_sigmoid(x):
        # Faster than sigmoid.
        z = 0.2*x + 0.5
        z = np.where(x < -2.5, 0.0, z)
        z = np.where(x > 2.5, 1.0, z)
        return z


In [4]:
# dados pata os testes
a = nr.random((2, 3, 5)) - 0.5
print('Input:')
print(a)

Input:
[[[-0.349 -0.44   0.037 -0.081  0.436]
  [ 0.055  0.294  0.498 -0.374  0.053]
  [ 0.089  0.162  0.134  0.044 -0.097]]

 [[ 0.436 -0.275  0.468  0.238  0.421]
  [ 0.218 -0.122  0.359 -0.164 -0.198]
  [ 0.368 -0.469 -0.287 -0.08   0.15 ]]]


## SimpleRNN

<table align='left'>
<tr><td> <img src="https://drive.google.com/uc?id=0By1KMDFVxsI2eHl3SGdKemlERk0"> </td></tr>
</table>

In [5]:
class MySimpleRNN(MyRecurrentNet):
    
    def _predict_one_batch(self, X):
        n_samples, n_steps, n_seq = X.shape        
        h = [None for _ in range(n_steps + 1)]
        h[0], _ = self._init_state(n_samples)
        for i in range(n_steps):
            h[i+1] = np.tanh(np.dot(X[:, i], Wx) + np.dot(h[i], Ws) + b)
        return self._get_output(h)
        

### modelo básico

In [6]:
inp = Input(shape=(3, 5))
out = SimpleRNN(10)(inp)
model = Model(inp, out)

model.summary()
print('Weight shapes:', [w.shape for w in model.get_weights()], end='\n\n')
Wx, Ws, b = model.get_weights()

rnn = MySimpleRNN(10, input_shape=(3, 5))
rnn.set_weights(Wx, Ws, b)

preds_1 = model.predict(a)
preds_2 = rnn.predict(a)
print('Output:')
print(preds_1)

ok = np.allclose(preds_1, preds_2)
print('--------\nOK:', ok)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 3, 5)              0         
_________________________________________________________________
simple_rnn_1 (SimpleRNN)     (None, 10)                160       
Total params: 160
Trainable params: 160
Non-trainable params: 0
_________________________________________________________________
Weight shapes: [(5, 10), (10, 10), (10,)]

Output:
[[ 0.36  -0.171  0.177 -0.463 -0.245 -0.314  0.347 -0.273 -0.49  -0.382]
 [-0.005  0.253 -0.03  -0.038  0.056  0.283  0.445 -0.054  0.009 -0.472]]
--------
OK: True


### retornando todas as saídas

In [7]:
inp = Input(shape=(3, 5))
out = SimpleRNN(10, return_sequences=True)(inp)
model = Model(inp, out)

model.summary()
print('Weight shapes:', [w.shape for w in model.get_weights()], end='\n\n')
model.set_weights([Wx, Ws, b])

rnn = MySimpleRNN(10, input_shape=(3, 5), return_sequences=True)
rnn.set_weights(Wx, Ws, b)

preds_1 = model.predict(a)
preds_2 = rnn.predict(a)
print('Output:')
print(preds_1)

ok = np.allclose(preds_1, preds_2)
print('--------\nOK:', ok)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 3, 5)              0         
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 3, 10)             160       
Total params: 160
Trainable params: 160
Non-trainable params: 0
_________________________________________________________________
Weight shapes: [(5, 10), (10, 10), (10,)]

Output:
[[[ 0.019  0.36  -0.502  0.291 -0.117 -0.111  0.242  0.074 -0.03   0.1  ]
  [ 0.009 -0.382 -0.263 -0.044 -0.219 -0.288 -0.381 -0.597 -0.052  0.453]
  [ 0.36  -0.171  0.177 -0.463 -0.245 -0.314  0.347 -0.273 -0.49  -0.382]]

 [[-0.354  0.427  0.043  0.276  0.095  0.123  0.417 -0.491 -0.091 -0.062]
  [-0.366 -0.495  0.388 -0.261  0.333 -0.184 -0.4   -0.542 -0.257  0.049]
  [-0.005  0.253 -0.03  -0.038  0.056  0.283  0.445 -0.054  0.009 -0.472]]]
--------
OK: True


### stateful

In [8]:
inp = Input(batch_shape=(1, 3, 5))
out = SimpleRNN(10, stateful=True)(inp)
model = Model(inp, out)

model.summary()
print('Weight shapes:', [w.shape for w in model.get_weights()], end='\n\n')
model.set_weights([Wx, Ws, b])

rnn = MySimpleRNN(10, input_shape=(3, 5), stateful=True)
rnn.set_weights(Wx, Ws, b)

preds_1 = model.predict(a, batch_size=1)
preds_2 = rnn.predict(a, batch_size=1)
print('Output:')
print(preds_1)

ok = np.allclose(preds_1, preds_2)
print('--------\nOK:', ok)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (1, 3, 5)                 0         
_________________________________________________________________
simple_rnn_3 (SimpleRNN)     (1, 10)                   160       
Total params: 160
Trainable params: 160
Non-trainable params: 0
_________________________________________________________________
Weight shapes: [(5, 10), (10, 10), (10,)]

Output:
[[ 0.36  -0.171  0.177 -0.463 -0.245 -0.314  0.347 -0.273 -0.49  -0.382]
 [-0.058  0.381  0.256 -0.293  0.49   0.242  0.443  0.114  0.345 -0.079]]
--------
OK: True


## LSTM

<table align='left'>
<tr><td> <img src="https://drive.google.com/uc?id=0By1KMDFVxsI2RHJjQ0dKNFJhZlE"> </td></tr>
</table>

In [9]:
class MyLSTM(MyRecurrentNet):
    
    def _predict_one_batch(self, X):
        n_samples, n_steps, n_seq = X.shape
        
        Wix, Wfx, Wcx, Wox = np.split(self.Wx, 4, axis=1)
        Wis, Wfs, Wcs, Wos = np.split(self.Ws, 4, axis=1)
        bi,  bf,  bc,  bo  = np.split(self.b,  4, axis=0)
        
        h = [None for _ in range(n_steps + 1)]
        s = [None for _ in range(n_steps + 1)]
        h[0], s[0] = self._init_state(n_samples)
        for i in range(n_steps):
            ft = self._hard_sigmoid(np.dot(X[:,i], Wfx) + np.dot(h[i], Wfs) + bf)
            it = self._hard_sigmoid(np.dot(X[:,i], Wix) + np.dot(h[i], Wis) + bi)
            ot = self._hard_sigmoid(np.dot(X[:,i], Wox) + np.dot(h[i], Wos) + bo)
            ct = np.tanh(np.dot(X[:,i], Wcx) + np.dot(h[i], Wcs) + bc)
            s[i+1] = ft * s[i] + it * ct
            h[i+1] = ot * np.tanh(s[i+1])
        return self._get_output(h, s)


### testando...

In [10]:
inp = Input(shape=(3, 5))
out = LSTM(10)(inp)
model = Model(inp, out)

model.summary()
print('Weight shapes:', [w.shape for w in model.get_weights()], end='\n\n')
Wx, Ws, b = model.get_weights()

rnn = MyLSTM(10, input_shape=(3, 5))
rnn.set_weights(Wx, Ws, b)

preds_1 = model.predict(a)
preds_2 = rnn.predict(a)
print('Output:')
print(preds_1)
# print()
# print(preds_2)

ok = np.allclose(preds_1, preds_2)
print('--------\nOK:', ok)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 3, 5)              0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 10)                640       
Total params: 640
Trainable params: 640
Non-trainable params: 0
_________________________________________________________________
Weight shapes: [(5, 40), (10, 40), (40,)]

Output:
[[ 0.004 -0.028 -0.006 -0.034 -0.035  0.046  0.008 -0.018  0.025 -0.019]
 [-0.008 -0.034 -0.03   0.017  0.011 -0.106 -0.037 -0.015  0.019  0.029]]
--------
OK: True


## GRU

<table align='left'>
<tr><td> <img src="https://drive.google.com/uc?id=0By1KMDFVxsI2dFJjRWM2VmJnM3c"> </td></tr>
</table>

In [11]:
class MyGRU(MyRecurrentNet):
    
    def _predict_one_batch(self, X):
        n_samples, n_steps, n_seq = X.shape
        
        Wzx, Wrx, Whx = np.split(self.Wx, 3, axis=1)
        Wzs, Wrs, Whs = np.split(self.Ws, 3, axis=1)
        bz,  br,  bh  = np.split(self.b,  3, axis=0)
        
        h = [None for _ in range(n_steps + 1)]
        h[0], _ = self._init_state(n_samples)
        for i in range(n_steps):
            zt = self._hard_sigmoid(np.dot(X[:,i], Wzx) + np.dot(h[i], Wzs) + bz)
            rt = self._hard_sigmoid(np.dot(X[:,i], Wrx) + np.dot(h[i], Wrs) + br)
            
            ht = np.tanh(np.dot(X[:,i], Whx) + np.dot(rt * h[i], Whs) + bh)
            
            h[i+1] = (1 - zt) * ht + zt * h[i]       # diferente da equação da figura acima!!
        return self._get_output(h)
        

### testando...

In [12]:
inp = Input(shape=(3, 5))
out = GRU(10)(inp)
model = Model(inp, out)

model.summary()
print('Weight shapes:', [w.shape for w in model.get_weights()], end='\n\n')
Wx, Ws, b = model.get_weights()

rnn = MyGRU(10, input_shape=(3, 5))
rnn.set_weights(Wx, Ws, b)

preds_1 = model.predict(a)
preds_2 = rnn.predict(a)
print('Output:')
print(preds_1)
# print()
# print(preds_2)

ok = np.allclose(preds_1, preds_2)
print('--------\nOK:', ok)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 3, 5)              0         
_________________________________________________________________
gru_1 (GRU)                  (None, 10)                480       
Total params: 480
Trainable params: 480
Non-trainable params: 0
_________________________________________________________________
Weight shapes: [(5, 30), (10, 30), (30,)]

Output:
[[-0.13   0.003  0.032 -0.035  0.077  0.016  0.021  0.098 -0.098 -0.018]
 [-0.053  0.099 -0.045 -0.116 -0.021 -0.08   0.12   0.161 -0.14  -0.041]]
--------
OK: True
