In [1]:
# https://github.com/aladdinpersson/Machine-Learning-Collection/blob/master/ML/TensorFlow/Basics/tutorial6-rnn-gru-lstm.py

import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist

# os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ["TF_ENABLE_ONEDNN_OPTS"] = "1"
# os.environ["MKL_DEBUG_CPU_TYPE"] = "5"
# os.environ["KMP_BLOCKTIME"] = "1"
# os.environ["KMP_AFFINITY"] = "granularity=fine,compact,1,0"
# os.environ["OMP_NUM_THREADS"] = "<num_threads>"

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
      for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            physical_devices = tf.config.list_physical_devices("GPU")
            tf.config.experimental.set_memory_growth(physical_devices[0], True)
else:
      print("No GPU device found")

No GPU device found


In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape([-1, 28, 28]).astype("float32") / 255.0
x_test = x_test.reshape([-1, 28, 28]).astype("float32") / 255.0

In [3]:
# THESE ARE ALL SEPARATE MODELS
model1 = keras.Sequential()
model1.add(keras.Input(shape=(None, 28)))
# simple rnn is a type of neural network used to dela specifically with sequential data (one long ass stream of data).
# return_sequences returns the output from each time step so we can stack multiple rnn layers on top of each other.
# in this case, 28 time steps.
model1.add(layers.SimpleRNN(512, return_sequences=True, activation="relu"))
model1.add(layers.SimpleRNN(512, activation="relu"))
model1.add(layers.Dense(10))
model1.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, None, 512)         276992    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 512)               524800    
                                                                 
 dense (Dense)               (None, 10)                5130      
                                                                 
Total params: 806,922
Trainable params: 806,922
Non-trainable params: 0
_________________________________________________________________


In [4]:
model2 = keras.Sequential()
model2.add(keras.Input(shape=(None, 28)))
model2.add(layers.SimpleRNN(256, return_sequences=True, activation="tanh"))
model2.add(layers.SimpleRNN(256, activation="relu"))
model2.add(layers.Dense(10))
model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn_2 (SimpleRNN)    (None, None, 256)         72960     
                                                                 
 simple_rnn_3 (SimpleRNN)    (None, 256)               131328    
                                                                 
 dense_1 (Dense)             (None, 10)                2570      
                                                                 
Total params: 206,858
Trainable params: 206,858
Non-trainable params: 0
_________________________________________________________________


In [5]:
model3 = keras.Sequential()
model3.add(keras.Input(shape=(None, 28)))
# a gated recurrent unit is a type of rnn that has advantages over long short term memory (lstm).
# gru uses less memory and is faste than lstm, but lstm is more accurate on datasets that use long sequences.
model3.add(layers.GRU(256, return_sequences=True, activation="relu"))
model3.add(layers.GRU(256, activation="relu"))
model3.add(layers.Dense(10))
model3.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru (GRU)                   (None, None, 256)         219648    
                                                                 
 gru_1 (GRU)                 (None, 256)               394752    
                                                                 
 dense_2 (Dense)             (None, 10)                2570      
                                                                 
Total params: 616,970
Trainable params: 616,970
Non-trainable params: 0
_________________________________________________________________


In [6]:
model4 = keras.Sequential()
model4.add(keras.Input(shape=(None, 28)))
# bidirectional rnns connect 2 hidden layers of opposite directions to the same output.
# 256 becomes 512 because we're letting it pass crap twice.
model4.add(layers.Bidirectional(layers.LSTM(256, return_sequences=True, activation="relu")))
model4.add(layers.Bidirectional(layers.LSTM(256, name="lstm_layer2")))
model4.add(layers.Dense(10))
model4.summary()

In [7]:
def do_something(model):
    print(model.summary())
    model1.compile(
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        optimizer=keras.optimizers.Adam(learning_rate=0.001),
        metrics=["accuracy"]
    )
    model1.fit(x_train, y_train, batch_size=64, epochs=10, verbose=1)
    model1.evaluate(x_test, y_test, batch_size=64, verbose=1)

In [8]:
do_something(model1)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 simple_rnn (SimpleRNN)      (None, None, 512)         276992    
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 512)               524800    
                                                                 
 dense (Dense)               (None, 10)                5130      
                                                                 
Total params: 806,922
Trainable params: 806,922
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
