<a href="https://colab.research.google.com/github/luisbeltranc/DeepLearning/blob/main/RNNs_con_TensorFlow_2_y_Keras_(MNIST)_LSTM_y_GRU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Redes Neuronales Recurrentes con TensorFlow 2 y Keras (2)

En este ejercicio vamos a continuar nuestra experimentación con redes neuronales recurrentes usando TensorFlow 2 y Keras.

## 1. Recapitulando

Recapitulemos lo trabajado en el notebook anterior. Primero consolidemos algunas funciones de ayuda y preparemos el conjunto de datos:

In [1]:
# Seleccionar TF 2 (directiva válida sólo en Colab)
# En otros entornos: !pip install tensorflow-gpu==2.0
%tensorflow_version 2.x
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
assert tf.__version__ >= "2.0"

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import accuracy_score

def preprocess_mnist(image, label):
  """Convierte una imagen de: shape=(28, 28, 1), dtype=tf.uint8
  a: shape=(28, 28), dtype=tf.float32
  """
  image = tf.squeeze(image, [-1])
  image = tf.image.convert_image_dtype(image, tf.float32)
  return image, label

def tile_labels(images, labels):
  """Convierte labels de dimensiones (BATCH_SIZE,) a [BATCH_SIZE, 28]"""
  labels2d = tf.expand_dims(labels, 1)
  labels2d_tiled = tf.tile(labels2d, [1, 28])
  return images, labels2d_tiled
  
def evaluar_mnist_many_to_many(modelo, x, y, num_units, num_epochs, resultados, nombre):
  '''Evalúa predicciones de un modelo many-to-many entrenado con MNIST'''
  pred_proba = modelo.predict_proba(x)  # (10000, 28, 10) Probabilidades para cada clase para cada paso de la secuencia
  pred = np.argmax(pred_proba, axis=-1)  # (10000, 28) Clase predecida para cada paso de la secuencia 
  pred_flat = pred.reshape([-1])  # (280000,) pred "flattened" 
  labels_tiled = np.tile(np.expand_dims(y, axis=1), [1,28])  # (10000, 28) Clase "target" para cada paso de la secuencia
  labels_tiled_flat = labels_tiled.reshape([-1])  # (280000,) labels_tiled "flattened" 
  acc_general = accuracy_score(labels_tiled_flat, pred_flat)  # Acc considerando cada paso de la secuencia
  acc_last = accuracy_score(y, pred[:,-1])  # Acc considerando sólo la predicción final de la secuencia
  resultado = {
      'modelo': nombre,
      'num_units': NUM_UNITS,
      'num_epochs': NUM_EPOCHS,
      'exactitud (accuracy)': acc_general,
      'exactitud (accuracy) en último paso': acc_last
  }
  return pred_proba, resultado

def visualize_preds(image, test_pred_proba, dataset='mnist'):
  '''Visualiza predicciones de un modelo many-to-many entrenado con MNIST'''
  fashion_mnist_label_names = ["top", "trouser", "pullover", "dress", "coat",
	"sandal", "shirt", "sneaker", "bag", "ankle boot"]
  ascii_grayscale = "B@%#*+=-:. "
  ascii_image = [[ascii_grayscale[int(round(pixel * (len(ascii_grayscale) - 1)))] for pixel in row] for row in image]
  print("{:31}{:15}{:10}".format(
    "Filas de la imagen", 
    "Predicción",
    "Probabilidad"
      ))
  for image_row, proba in zip(ascii_image, test_pred_proba):
    pred = np.argmax(proba)
    proba = np.max(proba)
    if dataset == 'fashion_mnist':
      label = ' ({})'.format(fashion_mnist_label_names[int(pred)])
    else:
      label = ''
    print("{}{:9d}{:18.7f}{}".format(
      ''.join(image_row), 
      pred,
      proba,
      label
      ))
  print()


NUM_STEPS = IMAGE_HEIGHT = 28
INPUT_SIZE = IMAGE_WIDTH = 28
NUM_UNITS = 128
NUM_CLASSES = 10
NUM_EPOCHS = 3

dataset, info = tfds.load(name="mnist", as_supervised=True, with_info=True, shuffle_files=False)
mnist_train = dataset["train"].map(preprocess_mnist).shuffle(10000).batch(2048).prefetch(1)
for images, labels in dataset["test"].map(preprocess_mnist).batch(10000).take(1):
  test_images = images.numpy()
  test_labels = labels.numpy()

resultados = pd.DataFrame(columns=['modelo', 'num_units', 'num_epochs', 'exactitud (accuracy)'])

[1mDownloading and preparing dataset mnist/3.0.1 (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to /root/tensorflow_datasets/mnist/3.0.1...[0m


local data directory. If you'd instead prefer to read directly from our public
GCS bucket (recommended if you're running on GCP), you can instead pass
`try_gcs=True` to `tfds.load` or set `data_dir=gs://tfds-data/datasets`.



HBox(children=(FloatProgress(value=0.0, description='Dl Completed...', max=4.0, style=ProgressStyle(descriptio…



[1mDataset mnist downloaded and prepared to /root/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.[0m


Probemos con un modelo con dos capas RNN apiladas:

In [2]:
# Modelo many-to-many
#model = tf.keras.models.Sequential([
#      keras.layers.SimpleRNN(NUM_UNITS, input_shape=(NUM_STEPS, INPUT_SIZE), return_sequences=True),
#      keras.layers.SimpleRNN(NUM_UNITS, return_sequences=True),
#      keras.layers.Dense(NUM_CLASSES, activation='softmax')
#])
#print(model.summary())

# Bidireccional sería:
model = tf.keras.models.Sequential([
       keras.layers.Bidirectional(keras.layers.SimpleRNN(NUM_UNITS, input_shape=(NUM_STEPS, INPUT_SIZE), return_sequences=True)),
       keras.layers.Dense(NUM_CLASSES, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
history = model.fit(mnist_train.map(tile_labels), epochs=NUM_EPOCHS)
print(model.summary())

test_pred_proba, resultado = evaluar_mnist_many_to_many(model, test_images, test_labels, NUM_UNITS, NUM_EPOCHS, resultados, 
                                                        'SimpleRNN, dos capas, many-to-many')
resultados = resultados.append(resultado, ignore_index=True)
resultados

Epoch 1/3
Epoch 2/3
Epoch 3/3
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 28, 256)           40192     
_________________________________________________________________
dense (Dense)                (None, 28, 10)            2570      
Total params: 42,762
Trainable params: 42,762
Non-trainable params: 0
_________________________________________________________________
None
Instructions for updating:
Please use `model.predict()` instead.


Instructions for updating:
Please use `model.predict()` instead.


Unnamed: 0,modelo,num_units,num_epochs,exactitud (accuracy),exactitud (accuracy) en último paso
0,"SimpleRNN, dos capas, many-to-many",128,3,0.7753,0.6475


Y visualicemos los resultados:

In [3]:
NUM_SAMPLES = 2
for IMAGE_INDEX in np.random.randint(test_images.shape[0], size=NUM_SAMPLES):
  print("Imagen #", IMAGE_INDEX)
  print("Label :", test_labels[IMAGE_INDEX])
  visualize_preds(test_images[IMAGE_INDEX], test_pred_proba[IMAGE_INDEX])


Imagen # 6257
Label : 2
Filas de la imagen             Predicción     Probabilidad
BBBBBBBBBBBBBBBBBBBBBBBBBBBB        2         0.6709101
BBBBBBBBBBBBBBBBBBBBBBBBBBBB        2         0.6500217
BBBBBBBBBBBBBBBBBBBBBBBBBBBB        2         0.6196710
BBBBBBBBBBBBBBBBBBBBBBBBBBBB        2         0.6210701
BBBBBBBBBBBBB@%==+@BBBBBBBBB        2         0.6609697
BBBBBBBBB@#-:      :%BBBBBBB        2         0.6357570
BBBBBBB@=.          .+BBBBBB        2         0.5722681
BBBBBB#.  .=#BBBB%*- .%BBBBB        2         0.6567498
BBBBBB= :#@BBBBBBBBB= .BBBBB        2         0.5936154
BBBBBB++@BBBBBBBBBBB@: BBBBB        2         0.5636172
BBBBBBBBBBBBBBBBBBBBB-.BBBBB        2         0.5591592
BBBBBBBBBBBBBBBBBBBB# *BBBBB        2         0.5755823
BBBBBBBBBBBBBBBBBBBB-.@BBBBB        2         0.6265932
BBBBBBBBBBB@%%%%BBB+ =BBBBBB        2         0.6912652
BBBBBBBBB*-.   .=## =@BBBBBB        2         0.7084684
BBBBBB@*.          :@BBBBBBB        2         0.7935316
BBBBBB=   ::-+:.    *

## 2. LSTM usando `tf.keras.layers.LSTM`

In [4]:
model = tf.keras.models.Sequential([
      keras.layers.LSTM(NUM_UNITS, input_shape=(NUM_STEPS, INPUT_SIZE), return_sequences=True),
      keras.layers.LSTM(NUM_UNITS, return_sequences=True),
      keras.layers.Dense(NUM_CLASSES, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
history = model.fit(mnist_train.map(tile_labels), epochs=NUM_EPOCHS)
print(model.summary())

test_pred_proba, resultado = evaluar_mnist_many_to_many(model, test_images, test_labels, NUM_UNITS, NUM_EPOCHS, resultados, 
                                                        'LSTM, dos capas, many-to-many')
resultados = resultados.append(resultado, ignore_index=True)
resultados

Epoch 1/3
Epoch 2/3
Epoch 3/3
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 28, 128)           80384     
_________________________________________________________________
lstm_1 (LSTM)                (None, 28, 128)           131584    
_________________________________________________________________
dense_1 (Dense)              (None, 28, 10)            1290      
Total params: 213,258
Trainable params: 213,258
Non-trainable params: 0
_________________________________________________________________
None


Unnamed: 0,modelo,num_units,num_epochs,exactitud (accuracy),exactitud (accuracy) en último paso
0,"SimpleRNN, dos capas, many-to-many",128,3,0.7753,0.6475
1,"LSTM, dos capas, many-to-many",128,3,0.596079,0.8602


In [5]:
NUM_SAMPLES = 2
for IMAGE_INDEX in np.random.randint(test_images.shape[0], size=NUM_SAMPLES):
  print("Imagen #", IMAGE_INDEX)
  print("Label :", test_labels[IMAGE_INDEX])
  visualize_preds(test_images[IMAGE_INDEX], test_pred_proba[IMAGE_INDEX])


Imagen # 3902
Label : 6
Filas de la imagen             Predicción     Probabilidad
BBBBBBBBBBBBBBBBBBBBBBBBBBBB        1         0.1060698
BBBBBBBBBBBBBBBBBBBBBBBBBBBB        1         0.1110501
BBBBBBBBBBB .BBBBBBBBBBBBBBB        4         0.1133353
BBBBBBBBBB%  @BBBBBBBBBBBBBB        4         0.1227792
BBBBBBBBBBB= @BBBBBBBBBBBBBB        4         0.1328907
BBBBBBBBBBB -BBBBBBBBBBBBBBB        5         0.1524575
BBBBBBBBBB# -BBBBBBBBBBBBBBB        5         0.1882555
BBBBBBBBBB* -BBBBBBBBBBBBBBB        5         0.2439182
BBBBBBBBBB* -BBBBBBBBBBBBBBB        5         0.3142004
BBBBBBBBBB* -BBBBBBBBBBBBBBB        5         0.3588222
BBBBBBBBBB* -BBBBBBBBBBBBBBB        6         0.4713330
BBBBBBBBBB% .@BBBBBBBBBBBBBB        6         0.6672776
BBBBBBBBBBB  @BBBBBBBBBBBBBB        6         0.8071932
BBBBBBBBBBB. @BBB@##BBBBBBBB        6         0.8816866
BBBBBBBBBBB: @BB*.  +BBBBBBB        6         0.9197451
BBBBBBBBBBB  %B+ :*..BBBBBBB        6         0.9424248
BBBBBBBBBBB= -* -BB- 

## 3. LSTM usando una celda propia

Salida (*output*):
$$ h_t = o_t \odot \tanh(c_t) $$

Unidad de estado (*state unit*):
$$ c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c}_t $$
$$ \tilde{c}_t = \tanh(U_c x_t + W_c h_{t-1} + b_c) $$

Compuertas de salida ($o$), entrada ($i$) y olvido ($f$):
$$ o_t = \sigma(U_o x_t + W_o h_{t-1} + b_o) $$
$$ i_t = \sigma(U_i x_t + W_i h_{t-1} + b_i) $$
$$ f_t = \sigma(U_f x_t + W_f h_{t-1} + b_f) $$

In [6]:
class MiCeldaLSTM(tf.keras.layers.Layer):
  """Cell class for LSTM.
  Arguments:
    units: Positive integer, dimensionality of the output space.
  Call arguments:
    inputs: A 2D tensor.
    states: List of state tensors corresponding to the previous timestep.
  Returns:
    output: Output tensor.
    next_states: List of state tensors corresponding to the next timestep.
  """

  def __init__(self,
               units,
               **kwargs):
    super(MiCeldaLSTM, self).__init__(**kwargs)
    self.units = units
    self.state_size = [units, units]  # LSTM tiene dos estados: h y c
    self.output_size = self.units

  def build(self, input_shape):
    # Parámetros principales
    self.U = self.add_weight( shape=(input_shape[-1], self.units),  name='U', initializer='glorot_uniform' )
    self.W = self.add_weight( shape=(self.units, self.units),       name='W', initializer='glorot_uniform' )
    self.b = self.add_weight( shape=(self.units,),                  name='b', initializer='zeros' )

    # Parámetros del output gate
    self.U_o = self.add_weight( shape=(input_shape[-1], self.units),  name='U_o', initializer='glorot_uniform' )
    self.W_o = self.add_weight( shape=(self.units, self.units),       name='W_o', initializer='glorot_uniform' )
    self.b_o = self.add_weight( shape=(self.units,),                  name='b_o', initializer='zeros' )

    # Parámetros del input gate
    self.U_i = self.add_weight( shape=(input_shape[-1], self.units),  name='U_i', initializer='glorot_uniform' )
    self.W_i = self.add_weight( shape=(self.units, self.units),       name='W_i', initializer='glorot_uniform' )
    self.b_i = self.add_weight( shape=(self.units,),                  name='b_i', initializer='zeros' )

    # Parámetros del forget gate
    self.U_f = self.add_weight( shape=(input_shape[-1], self.units),  name='U_f', initializer='glorot_uniform' )
    self.W_f = self.add_weight( shape=(self.units, self.units),       name='W_f', initializer='glorot_uniform' )
    self.b_f = self.add_weight( shape=(self.units,),                  name='b_f', initializer='zeros' )
    self.built = True


  def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
    if inputs is not None:
      batch_size = tf.shape(inputs)[0]
      dtype = inputs.dtype
    return [tf.zeros([batch_size, self.state_size[0]], dtype=dtype),
            tf.zeros([batch_size, self.state_size[1]], dtype=dtype)] # Valores iniciales de los estados h y c

  # `call` define las operaciones del layer
  def call(self, inputs, states, training=None):
    h_tmenos1 = states[0]
    c_tmenos1 = states[1]

    o = tf.sigmoid(tf.matmul(inputs, self.U_o) + tf.matmul(h_tmenos1, self.W_o) + self.b_o)
    i = tf.sigmoid(tf.matmul(inputs, self.U_i) + tf.matmul(h_tmenos1, self.W_i) + self.b_i)
    f = tf.sigmoid(tf.matmul(inputs, self.U_f) + tf.matmul(h_tmenos1, self.W_f) + self.b_f)
    ctilde = tf.tanh(tf.matmul(inputs, self.U) + tf.matmul(h_tmenos1, self.W) + self.b)
    c_t = tf.multiply(f, c_tmenos1) + tf.multiply(i, ctilde)
    h_t = tf.multiply(o, tf.tanh(c_t))

    output = h_t
    next_states = [h_t, c_t]

    return output, next_states

  # `get_config` es muy fácil de implementar y permite serializar el modelo
  def get_config(self):
    config = {
        'units':
            self.units
    }
    base_config = super(MiCeldaLSTM, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

In [7]:
model = tf.keras.models.Sequential([
      keras.layers.RNN(MiCeldaLSTM(NUM_UNITS), input_shape=(NUM_STEPS, INPUT_SIZE), return_sequences=True),
      keras.layers.RNN(MiCeldaLSTM(NUM_UNITS), return_sequences=True),
      keras.layers.Dense(NUM_CLASSES, activation='softmax')
])
print(model.summary())

model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
history = model.fit(mnist_train.map(tile_labels), epochs=NUM_EPOCHS)

test_pred_proba, resultado = evaluar_mnist_many_to_many(model, test_images, test_labels, NUM_UNITS, NUM_EPOCHS, resultados, 
                                                        'MiCeldaLSTM, dos capas, many-to-many')
resultados = resultados.append(resultado, ignore_index=True)
resultados

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rnn (RNN)                    (None, 28, 128)           80384     
_________________________________________________________________
rnn_1 (RNN)                  (None, 28, 128)           131584    
_________________________________________________________________
dense_2 (Dense)              (None, 28, 10)            1290      
Total params: 213,258
Trainable params: 213,258
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
Epoch 2/3
Epoch 3/3


Unnamed: 0,modelo,num_units,num_epochs,exactitud (accuracy),exactitud (accuracy) en último paso
0,"SimpleRNN, dos capas, many-to-many",128,3,0.7753,0.6475
1,"LSTM, dos capas, many-to-many",128,3,0.596079,0.8602
2,"MiCeldaLSTM, dos capas, many-to-many",128,3,0.603718,0.8692


## 4. Usando una celda GRU propia

Estado (*state*):
$$ h_t = (1 - z_t) \odot h_{t-1}  + z_t \odot \tilde{h}_t $$
$$ \tilde{h}_t = \tanh(U x_t + W (r_t \odot h_{t-1}) + b) $$

Compuertas de actualización ($u$) y restablecimiento ($r$):
$$ z_t = \sigma(U_z x_t + W_z h_{t-1} + b_z) $$
$$ r_t = \sigma(U_r x_t + W_r h_{t-1} + b_r) $$

In [26]:
class MiCeldaGRU(tf.keras.layers.Layer):
  """Cell class for GRU.
  Arguments:
    units: Positive integer, dimensionality of the output space.
  Call arguments:
    inputs: A 2D tensor.
    states: List of state tensors corresponding to the previous timestep.
  Returns:
    output: Output tensor.
    next_states: List of state tensors corresponding to the next timestep.
  """

  def __init__(self,
               units,
               **kwargs):
    super(MiCeldaGRU, self).__init__(**kwargs)
    self.units = units
    self.state_size = [units]  # GRU tiene un estado: h
    self.output_size = self.units

  def build(self, input_shape):
    # Parámetros principales
    self.U = self.add_weight( shape=(input_shape[-1], self.units),  name='U', initializer='glorot_uniform' )
    self.W = self.add_weight( shape=(self.units, self.units),       name='W', initializer='glorot_uniform' )
    self.b = self.add_weight( shape=(self.units,),                  name='b', initializer='zeros' )

    # Parámetros de compuerta de actualización z
    self.U_z = self.add_weight( shape=(input_shape[-1], self.units),  name='U_z', initializer='glorot_uniform' )
    self.W_z = self.add_weight( shape=(self.units, self.units),       name='W_z', initializer='glorot_uniform' )
    self.b_z = self.add_weight( shape=(self.units,),                  name='b_z', initializer='zeros' )

    # Parámetros de compuerta de restablecimiento r
    self.U_r = self.add_weight( shape=(input_shape[-1], self.units),  name='U_r', initializer='glorot_uniform' )
    self.W_r = self.add_weight( shape=(self.units, self.units),       name='W_r', initializer='glorot_uniform' )
    self.b_r = self.add_weight( shape=(self.units,),                  name='b_r', initializer='zeros' )
    self.built = True

  def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
    if inputs is not None:
      batch_size = tf.shape(inputs)[0]
      dtype = inputs.dtype
    return [tf.zeros([batch_size, self.state_size[0]], dtype=dtype),
            ] # Valores iniciales del estado h

  # `call` define las operaciones del layer
  def call(self, inputs, states, training=None):
    h_tmenos1 = states[0]
    
    z = tf.sigmoid(tf.matmul(inputs, self.U_z) + tf.matmul(h_tmenos1, self.W_z) + self.b_z)
    r = tf.sigmoid(tf.matmul(inputs, self.U_r) + tf.matmul(h_tmenos1, self.W_r) + self.b_z)
    htilde = tf.tanh(tf.matmul(inputs, self.U) + tf.matmul(tf.multiply(r,h_tmenos1), self.W) + self.b)
    h_t = tf.multiply((1-z), h_tmenos1) + tf.multiply(z, htilde)
    #h_t = tf.multiply(o, tf.tanh(c_t))

    output = h_t
    next_states = [h_t]

    return output, next_states

  # `get_config` es muy fácil de implementar y permite serializar el modelo
  def get_config(self):
    config = {
        'units':
            self.units
    }
    base_config = super(MiCeldaGRU, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))

In [27]:
model = tf.keras.models.Sequential([
      keras.layers.RNN(MiCeldaGRU(NUM_UNITS), input_shape=(NUM_STEPS, INPUT_SIZE), return_sequences=True),
      keras.layers.RNN(MiCeldaGRU(NUM_UNITS), return_sequences=True),
      keras.layers.Dense(NUM_CLASSES, activation='softmax')
])
print(model.summary())

model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
history = model.fit(mnist_train.map(tile_labels), epochs=NUM_EPOCHS)

test_pred_proba, resultado = evaluar_mnist_many_to_many(model, test_images, test_labels, NUM_UNITS, NUM_EPOCHS, resultados, 
                                                        'MiCeldaGRU, dos capas, many-to-many')
resultados = resultados.append(resultado, ignore_index=True)
resultados

Model: "sequential_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rnn_16 (RNN)                 (None, 28, 128)           60288     
_________________________________________________________________
rnn_17 (RNN)                 (None, 28, 128)           98688     
_________________________________________________________________
dense_10 (Dense)             (None, 28, 10)            1290      
Total params: 160,266
Trainable params: 160,266
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
















Epoch 2/3
Epoch 3/3


Unnamed: 0,modelo,num_units,num_epochs,exactitud (accuracy),exactitud (accuracy) en último paso
0,"SimpleRNN, dos capas, many-to-many",128,3,0.7753,0.6475
1,"LSTM, dos capas, many-to-many",128,3,0.596079,0.8602
2,"MiCeldaLSTM, dos capas, many-to-many",128,3,0.603718,0.8692
3,"MiCeldaGRU, dos capas, many-to-many",128,3,0.656743,0.9121


### 5. Usando una celda LSTM propia con conexiones desde el estado interior $c$ a las compuertas ("peepholes")

Prueba a configurar una celda LSTM con conexiones "peephole":

Salida (*output*):
$$ h_t = o_t \odot \tanh(c_t) $$

Unidad de estado (*state unit*):
$$ c_t = f_t \odot c_{t-1} + i_t \odot \tilde{c}_t $$
$$ \tilde{c}_t = \tanh(U_c x_t + W_c h_{t-1} + b_c) $$

Compuertas de salida ($o$), entrada ($i$) y olvido ($f$):
$$ o_t = \sigma(U_o x_t + W_o h_{t-1} + Q_o c_t + b_o) $$
$$ i_t = \sigma(U_i x_t + W_i h_{t-1} + Q_i c_{t-1} + b_i) $$
$$ f_t = \sigma(U_f x_t + W_f h_{t-1} + Q_f c_{t-1} + b_f) $$

In [35]:
class MiCeldaLSTMconPeepholes(tf.keras.layers.Layer):
  """Cell class for LSTM w/peepholes.
  """

  def __init__(self,
               units,
               **kwargs):
    super(MiCeldaLSTMconPeepholes, self).__init__(**kwargs)
    self.units = units
    self.state_size = [units, units]  # LSTM tiene dos estados: h y c
    self.output_size = self.units

  def build(self, input_shape):
    # Parámetros principales
    self.U = self.add_weight( shape=(input_shape[-1], self.units),  name='U', initializer='glorot_uniform' )
    self.W = self.add_weight( shape=(self.units, self.units),       name='W', initializer='glorot_uniform' )
    self.Q = self.add_weight( shape=(self.units, self.units),       name='Q', initializer='glorot_uniform' )
    self.b = self.add_weight( shape=(self.units,),                  name='b', initializer='zeros' )

    # Parámetros del output gate
    self.U_o = self.add_weight( shape=(input_shape[-1], self.units),  name='U_o', initializer='glorot_uniform' )
    self.W_o = self.add_weight( shape=(self.units, self.units),       name='W_o', initializer='glorot_uniform' )
    self.Q_o = self.add_weight( shape=(self.units, self.units),       name='Q_o', initializer='glorot_uniform' )
    self.b_o = self.add_weight( shape=(self.units,),                  name='b_o', initializer='zeros' )

    # Parámetros del input gate
    self.U_i = self.add_weight( shape=(input_shape[-1], self.units),  name='U_i', initializer='glorot_uniform' )
    self.W_i = self.add_weight( shape=(self.units, self.units),       name='W_i', initializer='glorot_uniform' )
    self.Q_i = self.add_weight( shape=(self.units, self.units),       name='Q_i', initializer='glorot_uniform' )
    self.b_i = self.add_weight( shape=(self.units,),                  name='b_i', initializer='zeros' )

    # Parámetros del forget gate
    self.U_f = self.add_weight( shape=(input_shape[-1], self.units),  name='U_f', initializer='glorot_uniform' )
    self.W_f = self.add_weight( shape=(self.units, self.units),       name='W_f', initializer='glorot_uniform' )
    self.Q_f = self.add_weight( shape=(self.units, self.units),       name='Q_f', initializer='glorot_uniform' )
    self.b_f = self.add_weight( shape=(self.units,),                  name='b_f', initializer='zeros' )
    self.built = True


  def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
    if inputs is not None:
      batch_size = tf.shape(inputs)[0]
      dtype = inputs.dtype
    return [tf.zeros([batch_size, self.state_size[0]], dtype=dtype),
            tf.zeros([batch_size, self.state_size[1]], dtype=dtype)] # Valores iniciales de los estados h y c

  # `call` define las operaciones del layer
  def call(self, inputs, states, training=None):
    h_tmenos1 = states[0]
    c_tmenos1 = states[1]
    
    i = tf.sigmoid(tf.matmul(inputs, self.U_i) + tf.matmul(h_tmenos1, self.W_i) + tf.matmul(c_tmenos1, self.Q_i) + self.b_i)
    f = tf.sigmoid(tf.matmul(inputs, self.U_f) + tf.matmul(h_tmenos1, self.W_f) + tf.matmul(c_tmenos1, self.Q_f) + self.b_f)
    ctilde = tf.tanh(tf.matmul(inputs, self.U) + tf.matmul(h_tmenos1, self.W) + self.b)
    c_t = tf.multiply(f, c_tmenos1) + tf.multiply(i, ctilde)
    o = tf.sigmoid(tf.matmul(inputs, self.U_o) + tf.matmul(h_tmenos1, self.W_o) + tf.matmul(c_t, self.Q_o) + self.b_o)
    h_t = tf.multiply(o, tf.tanh(c_t))

    output = h_t
    next_states = [h_t, c_t]

    return output, next_states

  # `get_config` es muy fácil de implementar y permite serializar el modelo
  def get_config(self):
    config = {
        'units':
            self.units
    }
    base_config = super(MiCeldaLSTMconPeepholes, self).get_config()
    return dict(list(base_config.items()) + list(config.items()))




In [36]:
model = tf.keras.models.Sequential([
      keras.layers.RNN(MiCeldaLSTMconPeepholes(NUM_UNITS), input_shape=(NUM_STEPS, INPUT_SIZE), return_sequences=True),
      keras.layers.RNN(MiCeldaLSTMconPeepholes(NUM_UNITS), return_sequences=True),
      keras.layers.Dense(NUM_CLASSES, activation='softmax')
])
print(model.summary())

model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
history = model.fit(mnist_train.map(tile_labels), epochs=NUM_EPOCHS)

test_pred_proba, resultado = evaluar_mnist_many_to_many(model, test_images, test_labels, NUM_UNITS, NUM_EPOCHS, resultados, 
                                                        'MiCeldaLSTMconPeepholes, dos capas, many-to-many')
resultados = resultados.append(resultado, ignore_index=True)
resultados

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rnn_22 (RNN)                 (None, 28, 128)           145920    
_________________________________________________________________
rnn_23 (RNN)                 (None, 28, 128)           197120    
_________________________________________________________________
dense_13 (Dense)             (None, 28, 10)            1290      
Total params: 344,330
Trainable params: 344,330
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/3
















Epoch 2/3
Epoch 3/3


Unnamed: 0,modelo,num_units,num_epochs,exactitud (accuracy),exactitud (accuracy) en último paso
0,"SimpleRNN, dos capas, many-to-many",128,3,0.7753,0.6475
1,"LSTM, dos capas, many-to-many",128,3,0.596079,0.8602
2,"MiCeldaLSTM, dos capas, many-to-many",128,3,0.603718,0.8692
3,"MiCeldaGRU, dos capas, many-to-many",128,3,0.656743,0.9121
4,"MiCeldaLSTMconPeepholes, dos capas, many-to-many",128,3,0.642639,0.9285
