# RNNs GRUs LSTMs Bidirectionality

In [38]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers
from tensorflow.keras.datasets import mnist

In [13]:
(x_train,y_train),(x_test,y_test)=mnist.load_data()

In [14]:
x_train = x_train.astype("float32")/255.0
x_test = x_test.astype("float32")/255.0

In [19]:
model = keras.Sequential()
model.add(keras.Input(shape=(None,28)))#we dont a aspecific dimnension for this
model.add(
    layers.SimpleRNN(512,return_sequences=True,activation='relu')#512 nodes, True, becuase return each time
)
model.add(layers.SimpleRNN(512,activation='relu'))#we can use tanh activation for GRU
model.add(layers.Dense(10))

print(model.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_5 (SimpleRNN)     (None, None, 512)         276992    
_________________________________________________________________
simple_rnn_6 (SimpleRNN)     (None, 512)               524800    
_________________________________________________________________
dense_2 (Dense)              (None, 10)                5130      
Total params: 806,922
Trainable params: 806,922
Non-trainable params: 0
_________________________________________________________________
None


In [16]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [17]:
model.fit(x_train,y_train,batch_size=64,epochs=10,verbose=2)

Epoch 1/10
938/938 - 130s - loss: 960.7050 - accuracy: 0.6666
Epoch 2/10
938/938 - 118s - loss: 2.5937 - accuracy: 0.5447
Epoch 3/10
938/938 - 117s - loss: 1.4074 - accuracy: 0.5028
Epoch 4/10
938/938 - 143s - loss: 0.9022 - accuracy: 0.6881
Epoch 5/10
938/938 - 161s - loss: 0.7650 - accuracy: 0.7348
Epoch 6/10
938/938 - 151s - loss: 0.7184 - accuracy: 0.7534
Epoch 7/10
938/938 - 158s - loss: 0.6840 - accuracy: 0.7748
Epoch 8/10
938/938 - 154s - loss: 0.6963 - accuracy: 0.7661
Epoch 9/10
938/938 - 154s - loss: 0.6214 - accuracy: 0.7996
Epoch 10/10
938/938 - 156s - loss: 1590449536.0000 - accuracy: 0.4949


<keras.callbacks.History at 0x1357e152a60>

In [18]:
model.evaluate(x_test,y_test,batch_size=64,verbose=2)

157/157 - 3s - loss: 110.7675 - accuracy: 0.1144


[110.76746368408203, 0.1143999993801117]

# GRUs

In [23]:
#################################### another model for GRU
model = keras.Sequential()
model.add(keras.Input(shape=(None,28)))#we dont a aspecific dimnension for this
model.add(
    layers.GRU(256,return_sequences=True,activation='tanh')#512 nodes, True, becuase return each time
)
model.add(layers.GRU(256,activation='tanh'))#we can use tanh activation
model.add(layers.Dense(10))

print(model.summary())

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru (GRU)                    (None, None, 256)         219648    
_________________________________________________________________
gru_1 (GRU)                  (None, 256)               394752    
_________________________________________________________________
dense_3 (Dense)              (None, 10)                2570      
Total params: 616,970
Trainable params: 616,970
Non-trainable params: 0
_________________________________________________________________
None


In [24]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train,y_train,batch_size=64,epochs=5,verbose=2)

In [None]:
model.evaluate(x_test,y_test,batch_size=64,verbose=2)

# LSTM

In [25]:
#################################### another model for LSTM
model = keras.Sequential()
model.add(keras.Input(shape=(None,28)))#we dont a aspecific dimnension for this
model.add(
    layers.LSTM(256,return_sequences=True,activation='tanh')#512 nodes, True, becuase return each time
)
model.add(layers.LSTM(256,activation='tanh'))#we can use tanh activation
model.add(layers.Dense(10))

print(model.summary())

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, None, 256)         291840    
_________________________________________________________________
lstm_1 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dense_4 (Dense)              (None, 10)                2570      
Total params: 819,722
Trainable params: 819,722
Non-trainable params: 0
_________________________________________________________________
None


In [26]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train,y_train,batch_size=64,epochs=5,verbose=2)

In [None]:
model.evaluate(x_test,y_test,batch_size=64,verbose=2)

# LSTM Bidirectional

In [28]:
#################################### another model for LSTM
model = keras.Sequential()
model.add(keras.Input(shape=(None,28)))#we dont a aspecific dimnension for this
model.add(
    layers.Bidirectional(
        layers.LSTM(256,return_sequences=True,activation='tanh')#512 nodes due to bidirectional, True, becuase return each time 
    )
)
model.add(
    layers.Bidirectional(
        layers.LSTM(256,activation='tanh'))#we can use tanh activation
    )
model.add(layers.Dense(10))

print(model.summary())

Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_1 (Bidirection (None, None, 512)         583680    
_________________________________________________________________
bidirectional_2 (Bidirection (None, 512)               1574912   
_________________________________________________________________
dense_5 (Dense)              (None, 10)                5130      
Total params: 2,163,722
Trainable params: 2,163,722
Non-trainable params: 0
_________________________________________________________________
None


In [29]:
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    metrics=['accuracy']
)

In [None]:
model.fit(x_train,y_train,batch_size=64,epochs=5,verbose=2)

In [None]:
model.evaluate(x_test,y_test,batch_size=64,verbose=2)

# InDepth Example for Functional API

In [34]:
import pandas as pd
import os

In [35]:
# Hyper parameters
BATCH_SIZE =64
WEIGHT_DECAY = 0.001
LEARNING_RATE = 0.01

In [50]:
train_df = pd.read_csv("./train.csv")
test_df = pd.read_csv("./test.csv")
train_images = os.getcwd() + "/train_images/" + train_df.iloc[:, 0].values
test_images = os.getcwd() + "/test_images/" + test_df.iloc[:, 0].values

train_labels = train_df.iloc[:, 1:].values
test_labels = test_df.iloc[:, 1:].values

In [51]:
def read_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_image(image, channels=1, dtype=tf.float32)

    # In older versions you need to set shape in order to avoid error
    # on newer (2.3.0+) the following 3 lines can safely be removed
    image.set_shape((64, 64, 1))
    label[0].set_shape([])
    label[1].set_shape([])

    labels = {"first_num": label[0], "second_num": label[1]}
    return image, labels

In [52]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_dataset = (
    train_dataset.shuffle(buffer_size=len(train_labels))
    .map(read_image)
    .batch(batch_size=BATCH_SIZE)
    .prefetch(buffer_size=AUTOTUNE)
)

test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
test_dataset = (
    test_dataset.map(read_image)
    .batch(batch_size=BATCH_SIZE)
    .prefetch(buffer_size=AUTOTUNE)
)

In [53]:
inputs = keras.Input(shape=(64, 64, 1))
x = layers.Conv2D(
    filters=32,
    kernel_size=3,
    padding="same",
    kernel_regularizer=regularizers.l2(WEIGHT_DECAY),
)(inputs)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.Conv2D(64, 3, kernel_regularizer=regularizers.l2(WEIGHT_DECAY),)(x)
x = layers.BatchNormalization()(x)
x = keras.activations.relu(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(
    64, 3, activation="relu", kernel_regularizer=regularizers.l2(WEIGHT_DECAY),
)(x)
x = layers.Conv2D(128, 3, activation="relu")(x)
x = layers.MaxPooling2D()(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation="relu")(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(64, activation="relu")(x)
output1 = layers.Dense(10, activation="softmax", name="first_num")(x)
output2 = layers.Dense(10, activation="softmax", name="second_num")(x)
model = keras.Model(inputs=inputs, outputs=[output1, output2])

In [54]:
model.compile(
    optimizer=keras.optimizers.Adam(LEARNING_RATE),
    loss=[
        keras.losses.SparseCategoricalCrossentropy(),
        keras.losses.SparseCategoricalCrossentropy(),
    ],
    metrics=["accuracy",]
)

In [85]:
model.fit(train_dataset, epochs=5, verbose=2)
model.evaluate(test_dataset, verbose=2)

Epoch 1/5


ValueError: in user code:

    D:\Anaconda\envs\DL-Env\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    D:\Anaconda\envs\DL-Env\lib\site-packages\keras\engine\training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    D:\Anaconda\envs\DL-Env\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    D:\Anaconda\envs\DL-Env\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    D:\Anaconda\envs\DL-Env\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    D:\Anaconda\envs\DL-Env\lib\site-packages\keras\engine\training.py:835 run_step  **
        outputs = model.train_step(data)
    D:\Anaconda\envs\DL-Env\lib\site-packages\keras\engine\training.py:787 train_step
        y_pred = self(x, training=True)
    D:\Anaconda\envs\DL-Env\lib\site-packages\keras\engine\base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    D:\Anaconda\envs\DL-Env\lib\site-packages\keras\engine\input_spec.py:266 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) +

    ValueError: Input 0 is incompatible with layer model_3: expected shape=(None, 28, 28, 1), found shape=(None, 64, 64, 1)


# Model Subclassing with Keras

In [57]:
(x_train,y_train),(x_test,y_test)=mnist.load_data()
x_train = x_train.reshape(-1,28,28,1).astype('float32')/255.0
x_test = x_test.reshape(-1,28,28,1).astype('float32')/255.0

In [59]:
# CNN ->BatchNorm->ReLU (Common structures)-->General Pattern

In [60]:
class CNNBlock(layers.Layer):
    def __init__(self,out_channels,kernel_size=3):
        super(CNNBlock,self).__init__()
        self.conv = layers.Conv2D(out_channels,kernel_size,padding='same')
        self.bn = layers.BatchNormalization()
    def call(self,input_tensor,training=False):
        x = self.conv(input_tensor)
        x = self.bn(x,training=training)
        x = tf.nn.relu(x)
        return x
        

In [61]:
model =keras.Sequential(
    [
        CNNBlock(32),
        CNNBlock(64),
        CNNBlock(128),
        layers.Flatten(),
        layers.Dense(10),
    ]
)

In [64]:
class ResBlock(layers.Layer):
    def __init__(self, channels):
        super(ResBlock, self).__init__()
        self.channels = channels
        self.cnn1 = CNNBlock(channels[0], 3)
        self.cnn2 = CNNBlock(channels[1], 3)
        self.cnn3 = CNNBlock(channels[2], 3)
        self.pooling = layers.MaxPooling2D()
        self.identity_mapping = layers.Conv2D(channels[1], 3, padding="same")

    def call(self, input_tensor, training=False):
        x = self.cnn1(input_tensor, training=training)
        x = self.cnn2(x, training=training)
        x = self.cnn3(x + self.identity_mapping(input_tensor), training=training,)
        x = self.pooling(x)
        return x

In [65]:
class ResNet_Like(keras.Model):
    def __init__(self, num_classes=10):
        super(ResNet_Like, self).__init__()
        self.block1 = ResBlock([32, 32, 64])
        self.block2 = ResBlock([128, 128, 256])
        self.block3 = ResBlock([128, 256, 512])
        self.pool = layers.GlobalAveragePooling2D()
        self.classifier = layers.Dense(num_classes)

    def call(self, input_tensor, training=False):
        x = self.block1(input_tensor, training=training)
        x = self.block2(x, training=training)
        x = self.block3(x, training=training)
        x = self.pool(x, training=training)
        x = self.classifier(x)
        return x

    def model(self):
        x = keras.Input(shape=(28, 28, 1))
        return keras.Model(inputs=[x], outputs=self.call(x))

In [66]:
model = ResNet_Like().model()
base_input = model.layers[0].input
base_output = model.layers[2].output
output = layers.Dense(10)(layers.Flatten()(base_output))
model = keras.Model(base_input, output)

In [67]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

In [68]:
model.fit(x_train, y_train, batch_size=64, epochs=1, verbose=2)
model.evaluate(x_test, y_test, batch_size=64, verbose=2)
model.save("pretrained")

938/938 - 70s - loss: 0.1025 - accuracy: 0.9682
157/157 - 3s - loss: 0.0479 - accuracy: 0.9844




INFO:tensorflow:Assets written to: pretrained\assets


INFO:tensorflow:Assets written to: pretrained\assets


# Custom Layers

In [69]:
x_train = x_train.reshape(-1,28*28).astype('float32')/255.0
x_test = x_test.reshape(-1,28*28).astype('float32')/255.0

In [72]:
class Dense(layers.Layer):
    def __init__(self, units, input_dim):
        super(Dense, self).__init__()
        self.w = self.add_weight(
            name="w",
            shape=(input_dim, units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            name="b", shape=(units,), initializer="zeros", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [73]:
class Dense(layers.Layer):
    def __init__(self, units):
        super(Dense, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            name="w",
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            name="b", shape=(self.units,), initializer="random_normal", trainable=True,
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [74]:
class MyReLU(layers.Layer):
    def __init__(self):
        super(MyReLU, self).__init__()

    def call(self, x):
        return tf.math.maximum(x, 0)

In [76]:
class MyModel(keras.Model):  # model.fit, model.evalute, model.predict
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__()
        self.dense1 = Dense(64)
        self.dense2 = Dense(num_classes)
        self.relu = MyReLU()

        # self.dense1 = layers.Dense(64)
        # self.dense3 = layers.Dense(num_classes)

    def call(self, x):
        x = self.relu(self.dense1(x))
        return self.dense2(x)

In [77]:
model = MyModel()
model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(),
    metrics=["accuracy"],
)

In [78]:
model.fit(x_train, y_train, batch_size=32, epochs=2, verbose=2)
model.evaluate(x_test, y_test, batch_size=32, verbose=2)

Epoch 1/2
1875/1875 - 7s - loss: 1.6072 - accuracy: 0.5361
Epoch 2/2
1875/1875 - 6s - loss: 0.7858 - accuracy: 0.7952
313/313 - 1s - loss: 0.6286 - accuracy: 0.8354


[0.6285871863365173, 0.8353999853134155]

# Save and Load Models

In [79]:
# we can save model with model.save("model_name_here")

In [80]:
#Load model
# model = keras.models.load_model('saved_models_here')# you can use the path here

In [82]:
#example
model = keras.models.load_model("./pretrained")

In [84]:
print(model.summary())# we got the model here

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 28, 28, 1)]       0         
_________________________________________________________________
res_block (ResBlock)         (None, 14, 14, 64)        28896     
_________________________________________________________________
res_block_1 (ResBlock)       (None, 7, 7, 256)         592512    
_________________________________________________________________
flatten_3 (Flatten)          (None, 12544)             0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)                125450    
Total params: 746,858
Trainable params: 745,578
Non-trainable params: 1,280
_________________________________________________________________
None


In [None]:
#save load_weights
# model.save_weights('checkpoint_folder/')
#load_wights
# model.load_weights('checkpoint_folder/')