In [19]:
import os
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras import layers, activations, regularizers
from tensorflow.keras.datasets import mnist 

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [2]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable = True)

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32')/255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32')/255.0

In [6]:
class CNNBlock(layers.Layer):
    def __init__(self, out_channels, kernel_size = 3):
        super(CNNBlock, self).__init__()
        self.conv1 = layers.Conv2D(out_channels, kernel_size, padding = 'same')
        self.bn = layers.BatchNormalization()
        
    def call(self, input_tensor, training = False):
        x = self.conv1(input_tensor)
        x = self.bn(x, training = training)
        x = tf.nn.relu(x)
        return x

In [17]:
inp = tf.keras.Input(shape = (28,28,1))
conv1 = CNNBlock(32)(inp)
conv2 = CNNBlock(64)(conv1)
conv3 = CNNBlock(128)(conv2)
flatten = layers.Flatten()(conv3)
dense = layers.Dense(10)(flatten)

model = Model(inputs = [inp], outputs = [dense])
print(model.summary())

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 cnn_block_11 (CNNBlock)     (None, 28, 28, 32)        448       
                                                                 
 cnn_block_12 (CNNBlock)     (None, 28, 28, 64)        18752     
                                                                 
 cnn_block_13 (CNNBlock)     (None, 28, 28, 128)       74368     
                                                                 
 flatten_3 (Flatten)         (None, 100352)            0         
                                                                 
 dense_2 (Dense)             (None, 10)                1003530   
                                                                 
Total params: 1,097,098
Trainable params: 1,096,650
Non-tra

In [20]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(),
    metrics = ['accuracy']
)

In [23]:
model.fit(x_train, y_train, batch_size = 64, verbose = 2, epochs = 3)
model.evaluate(x_test, y_test, batch_size = 64, verbose = 2)

Epoch 1/3
938/938 - 16s - loss: 0.0277 - accuracy: 0.9912 - 16s/epoch - 17ms/step
Epoch 2/3
938/938 - 15s - loss: 0.0242 - accuracy: 0.9923 - 15s/epoch - 16ms/step
Epoch 3/3
938/938 - 15s - loss: 0.0222 - accuracy: 0.9923 - 15s/epoch - 16ms/step
157/157 - 1s - loss: 0.0423 - accuracy: 0.9873 - 1s/epoch - 8ms/step


[0.04232215881347656, 0.9872999787330627]

In [29]:
class ResBlock(layers.Layer):
    def __init__(self, channels):
        super(ResBlock, self).__init__()
        self.conv1 = CNNBlock(channels[0])
        self.conv2 = CNNBlock(channels[1])
        self.conv3 = CNNBlock(channels[2])
        self.pool1 = layers.MaxPooling2D()
        self.identity = layers.Conv2D(channels[1], 1, padding = 'same')

    def call(self, input_tensor, training = False):
        op1 = self.conv1(input_tensor, training = training)
        op2 = self.conv2(op1, training = training)
        op3 = self.conv3(op2 + self.identity(input_tensor), training = training) 
        pool = self.pool1(op3)
        
        return pool

In [32]:
channels1 = [32, 32, 128]
channels2 = [128, 128, 256]
channels3 = [128, 256, 512]

inp = keras.Input(shape = (28, 28, 1))
res1 = ResBlock(channels1)(inp)
res2 = ResBlock(channels2)(res1)
res3 = ResBlock(channels3)(res2)
gap = layers.GlobalAveragePooling2D()(res3)
# flatten = layers.Flatten(gap)
dense = layers.Dense(10)(gap)

model = Model(inputs = [inp, ], outputs = [dense, ])
print(model.summary())

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 28, 28, 1)]       0         
                                                                 
 res_block_7 (ResBlock)      (None, 14, 14, 128)       47392     
                                                                 
 res_block_8 (ResBlock)      (None, 7, 7, 256)         608896    
                                                                 
 res_block_9 (ResBlock)      (None, 3, 3, 512)         1839744   
                                                                 
 global_average_pooling2d (G  (None, 512)              0         
 lobalAveragePooling2D)                                          
                                                                 
 dense_3 (Dense)             (None, 10)                5130      
                                                           

In [36]:
model.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(),
    metrics = ['accuracy']
)

In [37]:
model.fit(x_train, y_train, epochs = 1, verbose = 2, batch_size = 64)
model.evaluate(x_test, y_test, verbose = 2, batch_size = 64)

938/938 - 42s - loss: 0.0851 - accuracy: 0.9747 - 42s/epoch - 45ms/step
157/157 - 3s - loss: 0.0599 - accuracy: 0.9818 - 3s/epoch - 19ms/step


[0.059908293187618256, 0.9818000197410583]

In [47]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype('float32')/255.0
x_test = x_test.reshape(-1, 28 * 28).astype('float32')/255.0

In [58]:
class CustomLayer(layers.Layer):
    def __init__(self, output_dim, input_dim):
        super(CustomLayer, self).__init__()
        self.w = self.add_weight(
            name = 'weight-x',
            shape = (input_dim, output_dim),
            initializer = 'random_normal',
            trainable = True,
        )
        self.b = self.add_weight(
            name = 'bias-x',
            shape = (output_dim, ),
            initializer = 'zeros',
            trainable = True,
        )
    def call(self, input_tensor):
        op = tf.matmul(input_tensor, self.w) + self.b
        return op

In [60]:
class CustomModel(keras.Model):
    def __init__(self, num_classes = 10):
        super(CustomModel, self).__init__()
        self.dense1 = CustomLayer(64, 784)
        self.dense2 = CustomLayer(num_classes, 64)

    def call(self, input_tensor):
        layer1 = tf.nn.relu(self.dense1(input_tensor))
        return self.dense2(layer1)

In [61]:
model_custom = CustomModel()
model_custom.compile(
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = keras.optimizers.Adam(),
    metrics = ['accuracy'],
)

model_custom.fit(x_train, y_train, verbose = 2, batch_size = 32, epochs = 1)
model_custom.evaluate(x_test, y_test, verbose = 2, batch_size = 32)

1875/1875 - 4s - loss: 0.3425 - accuracy: 0.9062 - 4s/epoch - 2ms/step
313/313 - 1s - loss: 0.1904 - accuracy: 0.9425 - 716ms/epoch - 2ms/step


[0.19037021696567535, 0.9424999952316284]

Saving and Loading Model (Serialization of Model - saved as a data structure) and Model weights

Saving the model also saves the:
1. Model Weigths
2. Model Architecture
3. Training Configuration (The configs the model was compiled with)
4. Optimizer and states

In [63]:
# Sequential model saves work only with Sequential and Functional model saves work only with Functional model.

# model.save__weights('saved_model/', save_format = 'h5')
# model = model.load_weights('saved_model/')

In [64]:
#Saving the entire model with all configs: 
# (Works with Sequential, Functional and Subclassing)

# model.save('entire_saved_model/')
# model = keras.models.load_model('entire_saved_model/')