In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np

## Sequential model

In [2]:
model = tf.keras.Sequential()
# Densely connected layer with 64 units to the model
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
# Softmax layer
model.add(layers.Dense(10, activation='softmax'))


## Model subclassing

We can build a fully customizable model by sublclassing <b>tf.keras.Model</b> and defining our own forward pass. Here we set the attributes in the $__init__$ method and the forward pass in the call method

In [3]:
data = np.random.random((1000, 32))
labels = np.random.random((1000, 10))

In [4]:
class MyModel(tf.keras.Model):
    def __init__(self, num_classes=10):
        super(MyModel, self).__init__(name='my_model')
        self.num_classes = num_classes
        # Define the layers
        self.dense_1 = layers.Dense(32, activation='relu')
        self.dense_2 = layers.Dense(num_classes, activation='sigmoid')
    
    def call(self, inputs):
        # Forward pass
        x = self.dense_1(inputs)
        return self.dense_2(x)

In [5]:
# Instantiate the model class
model = MyModel(num_classes=10)
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [6]:
model.fit(data, labels, batch_size=32, epochs=5)

W0802 23:31:24.444645 139713071793984 deprecation.py:323] From /home/soumyajit/.local/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f112f6f1c50>

## Custom layers

We create custom layers by sublclassing <b>tf.keras.layers.Layer</b> and<br>
- \__init__ : Optionally define sublayers to be used by this layer
- build : Create the weights of the layer. And add weights with the <b>add_weight</b> method
- call : Define the forward pass
- A layer can be serialized by implementing the get_config method and the from_config class method

In [7]:
data = np.random.random((1000, 32))
data2 = np.random.random((1000, 32))
labels = np.random.random((1000, 10))

In [8]:
dataset = [data, data2]

In [9]:
data.shape

(1000, 32)

In [10]:
class MyLayer(tf.keras.layers.Layer):
    
    def __init__(self, output_dim, **kwargs):
        print("We are in __init__")
        self.output_dim = output_dim
        super(MyLayer, self).__init__(**kwargs)
        
    def build(self, input_shape):
        print("We are in  build")
        print(input_shape)
        # create a trainable weight variable
        self.kernel = self.add_weight(name='kernel',
                                     shape=(input_shape[1], self.output_dim),
                                     initializer = 'uniform',
                                     trainable = True)
        #print("Kernel: ", self.kernel)
        
    def call(self, inputs):
        print("We are in call")
        #input_1, input_2 = inputs
        #return tf.add(tf.matmul(input_1, self.kernel), input_2)
        return tf.matmul(inputs, self.kernel)
    
    def get_config(self):
        print("We are in get_config")
        base_config = super(MyLayer, self).get_config()
        base_config['output_dim'] = self.output_dim
        return base_config
    

In [11]:
model = tf.keras.Sequential([MyLayer(10), layers.Activation('softmax')])

We are in __init__


In [12]:
model.compile(optimizer=tf.keras.optimizers.RMSprop(0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [13]:
model.fit(data, labels, batch_size=32, epochs=5)


We are in  build
(None, 32)
We are in call
Train on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f11143c62b0>

In [14]:
tf.keras.backend.clear_session()

# The layer class

In [15]:
class Linear(tf.keras.layers.Layer):
    
    def __init__(self, units=32, input_dim=32):
        super(Linear, self).__init__()
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(initial_value=w_init(shape=(input_dim, units),
                                              dtype='float32'),trainable=True)
        b_init = tf.zeros_initializer()
        self.b = tf.Variable(initial_value=b_init(shape=(units,), dtype='float32'), trainable=True)
        
    def call(self, inputs):
        print("I am in call")
        return tf.matmul(inputs, self.w) + self.b
        

In [16]:
x =  tf.ones((2,2))
linear_layer = Linear(4,2)
y = linear_layer(x)
print(y)

I am in call
tf.Tensor(
[[ 0.07348098 -0.00939143 -0.02649076  0.03276831]
 [ 0.07348098 -0.00939143 -0.02649076  0.03276831]], shape=(2, 4), dtype=float32)


In [17]:
assert linear_layer.weights == [linear_layer.w, linear_layer.b]

In [18]:
linear_layer.weights

[<tf.Variable 'Variable:0' shape=(2, 4) dtype=float32, numpy=
 array([[ 6.2038124e-02,  3.3474594e-06, -4.8606031e-02,  4.1684832e-02],
        [ 1.1442853e-02, -9.3947789e-03,  2.2115273e-02, -8.9165196e-03]],
       dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(4,) dtype=float32, numpy=array([0., 0., 0., 0.], dtype=float32)>]

A layer can have non-trainable weights. Meaning the weights are not taken into account during backpropagation when we are training the layer.

In [19]:
class Sum(tf.keras.layers.Layer):
    def __init__(self, input_dim):
        super().__init__()
        self.total = tf.Variable(initial_value=tf.zeros((input_dim, )), 
                                 trainable= False)
    def call(self, inputs):
        self.total.assign_add(tf.reduce_sum(inputs, axis=0))
        return self.total

In [20]:
x = tf.ones((2,2))
my_sum = Sum(2)
y = my_sum(x)
y = my_sum(x)
y = my_sum(x)
print(y)

<tf.Variable 'Variable:0' shape=(2,) dtype=float32, numpy=array([6., 6.], dtype=float32)>


In [21]:
class Linear(tf.keras.layers.Layer):
    
    def __init__(self, units=64):
        super().__init__()
        self.units = units
    
    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.units), 
                                 initializer='random_normal', 
                                 trainable=True)
        self.b = self.add_weight(shape=(self.units,),
                                 initializer='random_normal',
                                trainable=True)
        
    def call(self, inputs):
        print(self.b)
        return tf.matmul(inputs, self.w) + self.b

In [22]:
x = tf.ones((2,2))
linear_layer = Linear(10)
y = linear_layer(x)
y

<tf.Variable 'linear_1/Variable:0' shape=(10,) dtype=float32, numpy=
array([-0.08670553,  0.00480035, -0.05248139,  0.00826279, -0.09604456,
        0.00295295,  0.03094005, -0.02024541, -0.08639894, -0.08661783],
      dtype=float32)>


<tf.Tensor: id=3493, shape=(2, 10), dtype=float32, numpy=
array([[-0.19240245,  0.03941366, -0.02431128, -0.02553701, -0.1108198 ,
         0.09344528, -0.03169997, -0.03902419, -0.06124165, -0.14791548],
       [-0.19240245,  0.03941366, -0.02431128, -0.02553701, -0.1108198 ,
         0.09344528, -0.03169997, -0.03902419, -0.06124165, -0.14791548]],
      dtype=float32)>

### Layers are recursively composable

In [23]:
class MLPBlock(tf.keras.layers.Layer):
    
    def __init__(self):
        super().__init__()
        self.linear1 = Linear(32)
        self.linear2 = Linear(32)
        self.linear3 = Linear(32)
        self.linear4 = Linear(1)
    
    def call(self, inputs):
        x = self.linear1(inputs)
        x = tf.nn.relu(x)
        x = self.linear2(x)
        x = tf.nn.relu(x)
        x = self.linear3(x)
        x = tf.nn.relu(x)
        return self.linear4(x)

In [24]:
mlp = MLPBlock()
y = mlp(tf.ones(shape=(3,64)))
print("Weights: ", len(mlp.weights))
print("trainable weights: ", len(mlp.trainable_weights))

<tf.Variable 'mlp_block/linear_2/Variable:0' shape=(32,) dtype=float32, numpy=
array([ 0.03481292, -0.00940918,  0.02307169, -0.00797744, -0.03739671,
        0.11979245, -0.00796023,  0.07728885, -0.1614969 ,  0.02622102,
        0.02817338, -0.05258849, -0.00651452,  0.02734741, -0.01881116,
        0.0220915 ,  0.0023178 , -0.05221058,  0.00597975,  0.02553415,
        0.07105091,  0.03704478, -0.00628842,  0.02715683, -0.0654978 ,
        0.06027733, -0.00441276, -0.03020672,  0.06210121, -0.00742736,
       -0.00546706, -0.00863805], dtype=float32)>
<tf.Variable 'mlp_block/linear_3/Variable:0' shape=(32,) dtype=float32, numpy=
array([ 0.06679098, -0.07779744,  0.00693042, -0.0392703 ,  0.02995011,
       -0.02553695,  0.10237887,  0.01981111, -0.09270662,  0.02814372,
       -0.01603961,  0.08201616,  0.01768069, -0.055011  , -0.05020701,
       -0.02050759, -0.02410841, -0.01638143,  0.03707058,  0.07861806,
       -0.02848534,  0.01321686, -0.02443574, -0.07694656,  0.00072243,


## Custom ConvLayer

In [25]:
import tensorflow as tf

In [26]:
class ConvLayer(tf.keras.layers.Layer):
    
    def __init__(self):
        super().__init__()
        self.w_conv1 = tf.Variable(tf.random.normal([5,5,1,32],
                                                       stddev=0.1))
        self.b_conv1 = tf.Variable(tf.constant(0.1, shape=[32]))
        
    def call(self, inputs):
        self.convolve1 = tf.nn.conv2d(inputs, self.w_conv1, 
                                     strides=[1,1,1,1], padding='SAME') + self.b_conv1
        self.h_conv1 = tf.nn.relu(self.convolve1)
        self.conv1 = tf.nn.max_pool(self.h_conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
        return self.conv1.shape
    
        
        

In [27]:
layer = ConvLayer()

In [28]:
x = tf.ones((1, 28,28,1))

In [29]:
layer(x)

TensorShape([1, 14, 14, 32])