In [1]:
import tensorflow as tf

In [2]:
tf.__version__

'2.1.0'

In [3]:
import numpy as np

#### Batch Normalization

In [4]:
a = tf.reshape(tf.range(1,7, dtype = tf.float32), shape=(2,3))
a

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [5]:
class custom_bn(tf.keras.layers.Layer):
    
    def __init__(self, scale=1, offset=0, axes=[0], **kwargs):
        super(custom_bn, self).__init__(**kwargs)
        """
        A simple batch normalization layer. 
        Check Keras BatchNormalization for advanced customization
        
        Also, the $/sigma$ in the equation in the tf API referring to variance
        is the standard deviation of the variance, not the variance itself
        
        Parameters
        ==========
        axes: list; passed as argument to tf.nn.moments for calculating the mean and variance
        """
        
        self.scale = scale
        self.offset = offset
        self.axes = axes
        
    def batch_norm_fn(self, inputs):
        
        self.mean, self.variance = tf.nn.moments(inputs, self.axes, keepdims=True)
        # If keepdims = False, resulting in (for axes = [1])
        # "InvalidArgumentError: Incompatible shapes: [2,3] vs. [2] [Op:Mul] name: custom_bn_1/cond/batchnorm/mul/"
        # This is because the output shapes returned from tf.nn.moments are not compatible for broadcasting
        # depending on which axes is used. 
        # Changing keepdims = True solves this   

        
        
        return tf.nn.batch_normalization(inputs, mean = self.mean, variance= self.variance,
                                                                   scale = self.scale, offset = self.offset, 
                                                                  variance_epsilon=0.001)
        
    def call(self, inputs, training = None):
        # If proper arguments are not passed to the batch_norm function below, then the following error will be output
        # "TypeError: batch_normalization() missing 5 required positional arguments: 
        #    'mean', 'variance', 'offset', 'scale', and 'variance_epsilon'"
        
        return tf.cond(training, lambda: self.batch_norm_fn(inputs), lambda: inputs)
            

### Testing on 2D data

In [6]:
# Axis = 0
custom_bn_layer = custom_bn(scale =2, offset = 1, axes=[0])

In [7]:
custom_bn_layer(a) # defaulting training = None

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [8]:
custom_bn_layer(a, training = True)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-0.9995558, -0.9995556, -0.9995558],
       [ 2.9995556,  2.999556 ,  2.9995556]], dtype=float32)>

In [9]:
# Axis = 1
custom_bn_layer2 = custom_bn(scale =2, offset = 1, axes=[1])

In [10]:
custom_bn_layer2(a, training = True)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-1.4476547,  1.       ,  3.4476547],
       [-1.4476547,  1.       ,  3.4476547]], dtype=float32)>

##### *Testing: Dismantling the batch normalization process*

In [11]:
a

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [12]:
mean, variance = tf.nn.moments(a, axes = [0], keepdims=False)
print(mean, variance)

tf.Tensor([2.5 3.5 4.5], shape=(3,), dtype=float32) tf.Tensor([2.25 2.25 2.25], shape=(3,), dtype=float32)


In [13]:
tf.nn.moments(a, axes = [0], keepdims=True)

(<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[2.5, 3.5, 4.5]], dtype=float32)>,
 <tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[2.25, 2.25, 2.25]], dtype=float32)>)

    * Axes = [0] calculates mean across the batch

In [14]:
a - mean

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-1.5, -1.5, -1.5],
       [ 1.5,  1.5,  1.5]], dtype=float32)>

In [15]:
a - tf.nn.moments(a, axes = [0], keepdims=True)[0]

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-1.5, -1.5, -1.5],
       [ 1.5,  1.5,  1.5]], dtype=float32)>

In [16]:
scale, offset = 2, 1

In [17]:
(a-mean)*scale

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-3., -3., -3.],
       [ 3.,  3.,  3.]], dtype=float32)>

In [18]:
(a-mean)/(tf.math.sqrt(variance) + 0.001)*scale + offset

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-0.9986675, -0.9986675, -0.9986675],
       [ 2.9986675,  2.9986675,  2.9986675]], dtype=float32)>

In [19]:
# using tf functions
tf.add(tf.multiply(tf.divide(tf.subtract(a, mean), tf.add(tf.math.sqrt(variance), 0.001)), scale), offset)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-0.9986675, -0.9986675, -0.9986675],
       [ 2.9986675,  2.9986675,  2.9986675]], dtype=float32)>

    * Axes = [1] should calculate the moments along the columns

In [20]:
mean, variance = tf.nn.moments(a, axes = [1], keepdims=True)
print(mean, variance)

tf.Tensor(
[[2.]
 [5.]], shape=(2, 1), dtype=float32) tf.Tensor(
[[0.6666667]
 [0.6666667]], shape=(2, 1), dtype=float32)


In [21]:
a

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)>

In [22]:
a.numpy().mean(axis = 1)

array([2., 5.], dtype=float32)

In [23]:
a.numpy().var(axis = 1)

array([0.6666667, 0.6666667], dtype=float32)

In [24]:
# using tf functions
tf.add(tf.multiply(tf.divide(tf.subtract(a, mean), tf.add(tf.math.sqrt(variance), 0.001)), scale), offset)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[-1.4464934,  1.       ,  3.4464934],
       [-1.4464934,  1.       ,  3.4464934]], dtype=float32)>

    * Both axes = [0] and [1] confirms

### Testing on 4D data such as in images or timeseries

In [25]:
test_data = tf.reshape(tf.range(0,24, dtype = np.float32), (2,2,3,2)) # reshaping in the format of an image [batch, height, width, channels]
test_data

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[ 0.,  1.],
         [ 2.,  3.],
         [ 4.,  5.]],

        [[ 6.,  7.],
         [ 8.,  9.],
         [10., 11.]]],


       [[[12., 13.],
         [14., 15.],
         [16., 17.]],

        [[18., 19.],
         [20., 21.],
         [22., 23.]]]], dtype=float32)>

#### Axes = [0]

In [26]:
custom_bn_layer = custom_bn(scale=2, offset=1, axes=[0])
x = custom_bn_layer(test_data, training = True)
x

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-0.9999721 , -0.9999721 ],
         [-0.99997216, -0.9999722 ],
         [-0.9999722 , -0.9999722 ]],

        [[-0.9999721 , -0.9999721 ],
         [-0.9999721 , -0.9999721 ],
         [-0.9999721 , -0.9999721 ]]],


       [[[ 2.999972  ,  2.999972  ],
         [ 2.999972  ,  2.999972  ],
         [ 2.999972  ,  2.999972  ]],

        [[ 2.9999723 ,  2.9999723 ],
         [ 2.9999723 ,  2.9999723 ],
         [ 2.9999723 ,  2.9999723 ]]]], dtype=float32)>

##### *Testing: Dismantling the batch normalization process*

In [27]:
# mean, var should have the shape [1,d2,d3,d4] where d2,d3,d4 are respective dimensions of the 4D input
mean, variance = tf.nn.moments(test_data, axes = [0], keepdims=True)
print(mean, variance)

tf.Tensor(
[[[[ 6.  7.]
   [ 8.  9.]
   [10. 11.]]

  [[12. 13.]
   [14. 15.]
   [16. 17.]]]], shape=(1, 2, 3, 2), dtype=float32) tf.Tensor(
[[[[36. 36.]
   [36. 36.]
   [36. 36.]]

  [[36. 36.]
   [36. 36.]
   [36. 36.]]]], shape=(1, 2, 3, 2), dtype=float32)


In [28]:
# using tf functions
xhat = tf.add(tf.multiply(tf.divide(tf.subtract(test_data, mean), tf.add(tf.math.sqrt(variance), 0.001)), scale), offset)
xhat

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-0.9996669, -0.9996669],
         [-0.9996669, -0.9996669],
         [-0.9996669, -0.9996669]],

        [[-0.9996669, -0.9996669],
         [-0.9996667, -0.9996667],
         [-0.9996667, -0.9996667]]],


       [[[ 2.999667 ,  2.999667 ],
         [ 2.999667 ,  2.999667 ],
         [ 2.999667 ,  2.999667 ]],

        [[ 2.999667 ,  2.999667 ],
         [ 2.9996667,  2.9996667],
         [ 2.9996667,  2.9996667]]]], dtype=float32)>

In [29]:
tf.round(x-xhat)

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]]], dtype=float32)>

    * Confirms

#### Axes = [1]

In [30]:
custom_bn_layer = custom_bn(scale=2, offset=1, axes=[1])
x = custom_bn_layer(test_data, training = True)
x

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-0.9998889 , -0.99988884],
         [-0.9998888 , -0.9998889 ],
         [-0.99988866, -0.9998889 ]],

        [[ 2.999889  ,  2.9998887 ],
         [ 2.999889  ,  2.999889  ],
         [ 2.999889  ,  2.999889  ]]],


       [[[-0.9998884 , -0.9998884 ],
         [-0.9998894 , -0.9998894 ],
         [-0.9998884 , -0.9998884 ]],

        [[ 2.9998894 ,  2.9998884 ],
         [ 2.9998884 ,  2.9998884 ],
         [ 2.9998894 ,  2.9998894 ]]]], dtype=float32)>

##### *Testing: Dismantling the batch normalization process*

In [31]:
# mean, var should have the shape [d1,1,d3,d4] where d1,d3,d4 are respective dimensions of the 4D input
mean, variance = tf.nn.moments(test_data, axes = [1], keepdims=True)
print(mean, variance)

tf.Tensor(
[[[[ 3.  4.]
   [ 5.  6.]
   [ 7.  8.]]]


 [[[15. 16.]
   [17. 18.]
   [19. 20.]]]], shape=(2, 1, 3, 2), dtype=float32) tf.Tensor(
[[[[9. 9.]
   [9. 9.]
   [9. 9.]]]


 [[[9. 9.]
   [9. 9.]
   [9. 9.]]]], shape=(2, 1, 3, 2), dtype=float32)


In [32]:
# using tf functions
xhat = tf.add(tf.multiply(tf.divide(tf.subtract(test_data, mean), tf.add(tf.math.sqrt(variance), 0.001)), scale), offset)
xhat

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-0.99933374, -0.99933374],
         [-0.99933374, -0.99933374],
         [-0.99933374, -0.99933374]],

        [[ 2.9993339 ,  2.9993339 ],
         [ 2.9993339 ,  2.9993339 ],
         [ 2.9993339 ,  2.9993339 ]]],


       [[[-0.99933374, -0.99933374],
         [-0.9993336 , -0.9993336 ],
         [-0.9993336 , -0.9993336 ]],

        [[ 2.9993339 ,  2.9993339 ],
         [ 2.9993336 ,  2.9993336 ],
         [ 2.9993336 ,  2.9993336 ]]]], dtype=float32)>

In [33]:
tf.round(x-xhat)

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]]], dtype=float32)>

    * Confirms

#### Axes = [2]

In [34]:
custom_bn_layer = custom_bn(scale=2, offset=1, axes=[2])
x = custom_bn_layer(test_data, training = True)
x

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-1.4490306, -1.4490305],
         [ 1.       ,  1.       ],
         [ 3.4490306,  3.4490309]],

        [[-1.4490309, -1.4490299],
         [ 1.       ,  1.       ],
         [ 3.4490309,  3.4490309]]],


       [[[-1.4490318, -1.4490299],
         [ 1.       ,  1.       ],
         [ 3.44903  ,  3.4490318]],

        [[-1.4490318, -1.4490299],
         [ 1.       ,  1.       ],
         [ 3.44903  ,  3.4490318]]]], dtype=float32)>

##### *Testing: Dismantling the batch normalization process*

In [35]:
# mean, var should have the shape [d1,d2,1,d4] where d1,d2,d4 are respective dimensions of the 4D input
mean, variance = tf.nn.moments(test_data, axes = [2], keepdims=True)
print(mean, variance)

tf.Tensor(
[[[[ 2.  3.]]

  [[ 8.  9.]]]


 [[[14. 15.]]

  [[20. 21.]]]], shape=(2, 2, 1, 2), dtype=float32) tf.Tensor(
[[[[2.6666667 2.6666667]]

  [[2.6666667 2.6666667]]]


 [[[2.6666667 2.6666667]]

  [[2.6666667 2.6666667]]]], shape=(2, 2, 1, 2), dtype=float32)


In [36]:
# using tf functions
xhat = tf.add(tf.multiply(tf.divide(tf.subtract(test_data, mean), tf.add(tf.math.sqrt(variance), 0.001)), scale), offset)
xhat

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-1.4479907, -1.4479907],
         [ 1.       ,  1.       ],
         [ 3.4479907,  3.4479907]],

        [[-1.4479907, -1.4479907],
         [ 1.       ,  1.       ],
         [ 3.4479907,  3.4479907]]],


       [[[-1.4479907, -1.4479907],
         [ 1.       ,  1.       ],
         [ 3.4479907,  3.4479907]],

        [[-1.4479907, -1.4479907],
         [ 1.       ,  1.       ],
         [ 3.4479907,  3.4479907]]]], dtype=float32)>

In [37]:
tf.round(x-xhat)

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]]], dtype=float32)>

    * Confirms

#### Axes = [3]

In [38]:
custom_bn_layer = custom_bn(scale=2, offset=1, axes=[3])
x = custom_bn_layer(test_data, training = True)
x

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-0.99601173,  2.9960117 ],
         [-0.99601173,  2.9960117 ],
         [-0.99601173,  2.9960117 ]],

        [[-0.99601173,  2.9960117 ],
         [-0.9960098 ,  2.9960136 ],
         [-0.9960098 ,  2.9960136 ]]],


       [[[-0.9960098 ,  2.9960136 ],
         [-0.9960098 ,  2.9960136 ],
         [-0.99601364,  2.9960098 ]],

        [[-0.9960098 ,  2.9960098 ],
         [-0.9960098 ,  2.9960098 ],
         [-0.9960098 ,  2.9960175 ]]]], dtype=float32)>

##### *Testing: Dismantling the batch normalization process*

In [39]:
# mean, var should have the shape [d1,d2,d3,1] where d1,d2,d3 are respective dimensions of the 4D input
mean, variance = tf.nn.moments(test_data, axes = [3], keepdims=True)
print(mean, variance)

tf.Tensor(
[[[[ 0.5]
   [ 2.5]
   [ 4.5]]

  [[ 6.5]
   [ 8.5]
   [10.5]]]


 [[[12.5]
   [14.5]
   [16.5]]

  [[18.5]
   [20.5]
   [22.5]]]], shape=(2, 2, 3, 1), dtype=float32) tf.Tensor(
[[[[0.25]
   [0.25]
   [0.25]]

  [[0.25]
   [0.25]
   [0.25]]]


 [[[0.25]
   [0.25]
   [0.25]]

  [[0.25]
   [0.25]
   [0.25]]]], shape=(2, 2, 3, 1), dtype=float32)


In [40]:
# using tf functions
xhat = tf.add(tf.multiply(tf.divide(tf.subtract(test_data, mean), tf.add(tf.math.sqrt(variance), 0.001)), scale), offset)
xhat

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-0.99600804,  2.996008  ],
         [-0.99600804,  2.996008  ],
         [-0.99600804,  2.996008  ]],

        [[-0.99600804,  2.996008  ],
         [-0.99600804,  2.996008  ],
         [-0.99600804,  2.996008  ]]],


       [[[-0.99600804,  2.996008  ],
         [-0.99600804,  2.996008  ],
         [-0.99600804,  2.996008  ]],

        [[-0.99600804,  2.996008  ],
         [-0.99600804,  2.996008  ],
         [-0.99600804,  2.996008  ]]]], dtype=float32)>

In [41]:
tf.round(x-xhat)

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]]], dtype=float32)>

    * Confirms

#### Similarly a list of axes can be passed.
    * E.g. if axes = [0,2], first the moments are applied along 0, and then along 2

In [42]:
custom_bn_layer = custom_bn(scale=2, offset=1, axes=[0,2])
x = custom_bn_layer(test_data, training = True)
x

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-1.5730367 , -1.5730367 ],
         [-0.9297775 , -0.92977744],
         [-0.28651834, -0.28651834]],

        [[-1.5730368 , -1.5730367 ],
         [-0.9297776 , -0.9297776 ],
         [-0.28651857, -0.28651834]]],


       [[[ 2.2865183 ,  2.2865186 ],
         [ 2.9297776 ,  2.9297776 ],
         [ 3.5730367 ,  3.5730367 ]],

        [[ 2.286518  ,  2.286518  ],
         [ 2.9297771 ,  2.9297776 ],
         [ 3.5730367 ,  3.5730367 ]]]], dtype=float32)>

##### *Testing: Dismantling the batch normalization process*

In [43]:
# Applying along 0 first resulting in shape [1,d2,d3,d4] 
mean, variance = tf.nn.moments(test_data, axes = [0], keepdims=True)
print(mean, variance)

tf.Tensor(
[[[[ 6.  7.]
   [ 8.  9.]
   [10. 11.]]

  [[12. 13.]
   [14. 15.]
   [16. 17.]]]], shape=(1, 2, 3, 2), dtype=float32) tf.Tensor(
[[[[36. 36.]
   [36. 36.]
   [36. 36.]]

  [[36. 36.]
   [36. 36.]
   [36. 36.]]]], shape=(1, 2, 3, 2), dtype=float32)


In [44]:
# Applying along 2 to the mean from above 
mean_mean, variance_mean = tf.nn.moments(mean, axes = [2], keepdims=True)
print(mean_mean, variance_mean)

tf.Tensor(
[[[[ 8.  9.]]

  [[14. 15.]]]], shape=(1, 2, 1, 2), dtype=float32) tf.Tensor(
[[[[2.6666667 2.6666667]]

  [[2.6666667 2.6666667]]]], shape=(1, 2, 1, 2), dtype=float32)


In [45]:
# Applying along 2 to the variance from above 
mean_var, var_var = tf.nn.moments(variance, axes = [2], keepdims=True)
print(mean_var, var_var)

tf.Tensor(
[[[[36. 36.]]

  [[36. 36.]]]], shape=(1, 2, 1, 2), dtype=float32) tf.Tensor(
[[[[0. 0.]]

  [[0. 0.]]]], shape=(1, 2, 1, 2), dtype=float32)


In [46]:
# total variance is the variance from axes = 0 + var from axes = 2
variance = mean_var + variance_mean
variance

<tf.Tensor: shape=(1, 2, 1, 2), dtype=float32, numpy=
array([[[[38.666668, 38.666668]],

        [[38.666668, 38.666668]]]], dtype=float32)>

In [47]:
mean = mean_mean
mean

<tf.Tensor: shape=(1, 2, 1, 2), dtype=float32, numpy=
array([[[[ 8.,  9.]],

        [[14., 15.]]]], dtype=float32)>

In [48]:
# Applyling both together  
mean, variance = tf.nn.moments(test_data, axes = [0,2], keepdims=True)
print(mean, variance)

tf.Tensor(
[[[[ 8.  9.]]

  [[14. 15.]]]], shape=(1, 2, 1, 2), dtype=float32) tf.Tensor(
[[[[38.666668 38.666668]]

  [[38.666668 38.666668]]]], shape=(1, 2, 1, 2), dtype=float32)


In [49]:
# using tf functions
xhat = tf.add(tf.multiply(tf.divide(tf.subtract(test_data, mean), tf.add(tf.math.sqrt(variance), 0.001)), scale), offset)
xhat

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-1.5726564 , -1.5726564 ],
         [-0.92949235, -0.92949235],
         [-0.2863282 , -0.2863282 ]],

        [[-1.5726564 , -1.5726564 ],
         [-0.92949235, -0.92949235],
         [-0.2863282 , -0.2863282 ]]],


       [[[ 2.2863283 ,  2.2863283 ],
         [ 2.9294925 ,  2.9294925 ],
         [ 3.5726564 ,  3.5726564 ]],

        [[ 2.2863283 ,  2.2863283 ],
         [ 2.9294925 ,  2.9294925 ],
         [ 3.5726564 ,  3.5726564 ]]]], dtype=float32)>

In [50]:
tf.round(x-xhat)

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]]], dtype=float32)>

    * Confirms

### Global normalization typically used for convolutional networks
    * This is done by passing axes = [0,1,2] leaving out the last channel dimension

In [51]:
custom_bn_layer = custom_bn(scale=2, offset=1, axes=[0,1,2])
x = custom_bn_layer(test_data, training = True)
x

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-2.1864765 , -2.1864765 ],
         [-1.6071172 , -1.6071172 ],
         [-1.0277578 , -1.0277579 ]],

        [[-0.44839835, -0.4483986 ],
         [ 0.13096094,  0.13096094],
         [ 0.71032023,  0.71032023]]],


       [[[ 1.2896798 ,  1.2896795 ],
         [ 1.8690388 ,  1.869039  ],
         [ 2.4483984 ,  2.448398  ]],

        [[ 3.027758  ,  3.0277576 ],
         [ 3.607117  ,  3.6071172 ],
         [ 4.1864767 ,  4.186476  ]]]], dtype=float32)>

##### *Testing: Dismantling the batch normalization process*

In [52]:
# Applying along 0 first resulting in shape [1,d2,d3,d4] 
mean0, var0 = tf.nn.moments(test_data, axes = [0], keepdims=True)
print(mean0, var0)

tf.Tensor(
[[[[ 6.  7.]
   [ 8.  9.]
   [10. 11.]]

  [[12. 13.]
   [14. 15.]
   [16. 17.]]]], shape=(1, 2, 3, 2), dtype=float32) tf.Tensor(
[[[[36. 36.]
   [36. 36.]
   [36. 36.]]

  [[36. 36.]
   [36. 36.]
   [36. 36.]]]], shape=(1, 2, 3, 2), dtype=float32)


In [53]:
# Applying along 1 to mean0 from above 
mean1, var1 = tf.nn.moments(mean0, axes = [1], keepdims=True)
print(mean1, var1)

tf.Tensor(
[[[[ 9. 10.]
   [11. 12.]
   [13. 14.]]]], shape=(1, 1, 3, 2), dtype=float32) tf.Tensor(
[[[[9. 9.]
   [9. 9.]
   [9. 9.]]]], shape=(1, 1, 3, 2), dtype=float32)


In [54]:
# Applying along 2 to the mean1 from above 
mean2, var2 = tf.nn.moments(mean1, axes = [2], keepdims=True)
print(mean2, var2)

tf.Tensor([[[[11. 12.]]]], shape=(1, 1, 1, 2), dtype=float32) tf.Tensor([[[[2.6666667 2.6666667]]]], shape=(1, 1, 1, 2), dtype=float32)


In [55]:
mean = mean2
mean

<tf.Tensor: shape=(1, 1, 1, 2), dtype=float32, numpy=array([[[[11., 12.]]]], dtype=float32)>

In [56]:
# total variance is sum of all variances 
variance = var0+var1+var2
variance

<tf.Tensor: shape=(1, 2, 3, 2), dtype=float32, numpy=
array([[[[47.666668, 47.666668],
         [47.666668, 47.666668],
         [47.666668, 47.666668]],

        [[47.666668, 47.666668],
         [47.666668, 47.666668],
         [47.666668, 47.666668]]]], dtype=float32)>

In [57]:
variance = tf.reduce_mean(variance, axis = [0,1,2])
variance

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([47.666668, 47.666668], dtype=float32)>

In [58]:
# Applyling both together  
mean, variance = tf.nn.moments(test_data, axes = [0,1,2], keepdims=True)
print(mean, variance)

tf.Tensor([[[[11. 12.]]]], shape=(1, 1, 1, 2), dtype=float32) tf.Tensor([[[[47.666668 47.666668]]]], shape=(1, 1, 1, 2), dtype=float32)


In [59]:
# using tf functions
xhat = tf.add(tf.multiply(tf.divide(tf.subtract(test_data, mean), tf.add(tf.math.sqrt(variance), 0.001)), scale), offset)
xhat

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[-2.1860485 , -2.1860485 ],
         [-1.6067669 , -1.6067669 ],
         [-1.0274854 , -1.0274854 ]],

        [[-0.44820392, -0.44820392],
         [ 0.13107765,  0.13107765],
         [ 0.7103592 ,  0.7103592 ]]],


       [[[ 1.2896408 ,  1.2896408 ],
         [ 1.8689224 ,  1.8689224 ],
         [ 2.448204  ,  2.448204  ]],

        [[ 3.0274854 ,  3.0274854 ],
         [ 3.606767  ,  3.606767  ],
         [ 4.1860485 ,  4.1860485 ]]]], dtype=float32)>

In [60]:
tf.round(x-xhat)

<tf.Tensor: shape=(2, 2, 3, 2), dtype=float32, numpy=
array([[[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]],


       [[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]]], dtype=float32)>

    * Confirms

#### Testing against Keras BatchNormalization Layer

In [76]:
def kerasbn(scale = 2, offset = 1, axis= 0):
    return tf.keras.layers.BatchNormalization(axis = axis, epsilon = 0.001, center =True, scale =True)

In [78]:
kerasbnlayer = kerasbn()

In [86]:
kerasbnlayer(a)

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 1.9990008,  3.9980016,  5.9970026],
       [ 7.996003 ,  9.995004 , 11.994005 ]], dtype=float32)>

In [87]:
kerasbnlayer.weights

[<tf.Variable 'batch_normalization_4/beta:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>,
 <tf.Variable 'Variable:0' shape=(2,) dtype=float32, numpy=array([2., 2.], dtype=float32)>,
 <tf.Variable 'batch_normalization_4/moving_mean:0' shape=(2,) dtype=float32, numpy=array([0., 0.], dtype=float32)>,
 <tf.Variable 'batch_normalization_4/moving_variance:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>]

In [81]:
kerasbnlayer.gamma

<tf.Variable 'batch_normalization_4/gamma:0' shape=(2,) dtype=float32, numpy=array([1., 1.], dtype=float32)>

In [84]:
kerasbnlayer.gamma = tf.Variable([2,2], dtype = tf.float32)

In [85]:
kerasbnlayer.gamma

<tf.Variable 'Variable:0' shape=(2,) dtype=float32, numpy=array([2., 2.], dtype=float32)>