In [1]:
import numpy as np
import tensorflow as tf

tf.enable_eager_execution()
tf.__version__

'1.14.0'

$x$ shape = (B, H, W, C)

* batchnorm: $\mu$ shape = (C, )

$$\mu_{\cdot} = \frac{1}{BHW} \sum_{b,h,w} x_{b,h,w,\cdot}$$

* layernorm: $\mu$ shape = (B, )

$$\mu_{\cdot} = \frac{1}{HWC} \sum_{h,w,c} x_{\cdot,h,w,c}$$

* instance: $\mu$ shape = (B, C)

$$\mu_{\cdot,\cdot} = \frac{1}{HW} \sum_{h,w} x_{\cdot,h,w,\cdot}$$

In [2]:
x = np.reshape(np.arange(16).astype('float32'), (2,2,2,2))
def print_samples(x):
    for i,sample in enumerate(x):
        print('Sample {}:'.format(i))
        for j in range(sample.shape[-1]):
            print(' channel {}: \n {}'.format(j, sample[:,:,j]))
print_samples(x)

Sample 0:
 channel 0: 
 [[0. 2.]
 [4. 6.]]
 channel 1: 
 [[1. 3.]
 [5. 7.]]
Sample 1:
 channel 0: 
 [[ 8. 10.]
 [12. 14.]]
 channel 1: 
 [[ 9. 11.]
 [13. 15.]]


## Batch Normalization

In [3]:
batch_norm = tf.keras.layers.BatchNormalization()

In [4]:
y = batch_norm(x, training=True)
# y = tf.contrib.layers.batch_norm(x)
print('After Batch Normalization:')
print_samples(y)

After Batch Normalization:
Sample 0:
 channel 0: 
 [[-1.527489   -1.0910635 ]
 [-0.6546381  -0.21821271]]
 channel 1: 
 [[-1.527489   -1.0910635 ]
 [-0.6546381  -0.21821271]]
Sample 1:
 channel 0: 
 [[0.21821271 0.6546381 ]
 [1.0910635  1.527489  ]]
 channel 1: 
 [[0.21821271 0.6546381 ]
 [1.0910635  1.527489  ]]


In [5]:
y = (x - np.mean(x, axis=(0,1,2))[None,None,None,:]) / np.std(x, axis=(0,1,2))[None,None,None,:]
print('Compute manually:')
print_samples(y)

Compute manually:
Sample 0:
 channel 0: 
 [[-1.5275252  -1.0910894 ]
 [-0.65465367 -0.21821788]]
 channel 1: 
 [[-1.5275252  -1.0910894 ]
 [-0.65465367 -0.21821788]]
Sample 1:
 channel 0: 
 [[0.21821788 0.65465367]
 [1.0910894  1.5275252 ]]
 channel 1: 
 [[0.21821788 0.65465367]
 [1.0910894  1.5275252 ]]


## Layer Normalization

In [6]:
layer_norm = tf.keras.layers.LayerNormalization(axis=[1,2,3])

In [7]:
y = layer_norm(x)
# y = tf.contrib.layers.layer_norm(x)
print('After Layer Normalization:')
print_samples(y)

After Layer Normalization:
Sample 0:
 channel 0: 
 [[-1.5273798 -0.6545913]
 [ 0.2181971  1.0909855]]
 channel 1: 
 [[-1.0909855 -0.2181971]
 [ 0.6545913  1.5273798]]
Sample 1:
 channel 0: 
 [[-1.52738    -0.65459156]
 [ 0.21819687  1.0909853 ]]
 channel 1: 
 [[-1.0909858  -0.21819735]
 [ 0.6545911   1.5273795 ]]


In [8]:
y = (x - np.mean(x, axis=(1,2,3))[:,None,None,None]) / np.std(x, axis=(1,2,3))[:,None,None,None]
print('Compute manually:')
print_samples(y)

Compute manually:
Sample 0:
 channel 0: 
 [[-1.5275252  -0.65465367]
 [ 0.21821788  1.0910894 ]]
 channel 1: 
 [[-1.0910894  -0.21821788]
 [ 0.65465367  1.5275252 ]]
Sample 1:
 channel 0: 
 [[-1.5275252  -0.65465367]
 [ 0.21821788  1.0910894 ]]
 channel 1: 
 [[-1.0910894  -0.21821788]
 [ 0.65465367  1.5275252 ]]


## Instance Normalization

In [9]:
instance_norm = tf.keras.layers.LayerNormalization(axis=[1,2])

In [10]:
y = instance_norm(x)
# y = tf.contrib.layers.instance_norm(x)
print('After Instance Normalization:')
print_samples(y)

After Instance Normalization:
Sample 0:
 channel 0: 
 [[-1.3415066  -0.4471689 ]
 [ 0.44716883  1.3415066 ]]
 channel 1: 
 [[-1.3415066  -0.44716883]
 [ 0.44716895  1.3415066 ]]
Sample 1:
 channel 0: 
 [[-1.3415067  -0.44716883]
 [ 0.44716883  1.3415065 ]]
 channel 1: 
 [[-1.3415065  -0.44716883]
 [ 0.44716883  1.3415065 ]]


In [11]:
y = (x - np.mean(x, axis=(1,2))[:,None,None,:]) / np.std(x, axis=(1,2))[:,None,None:]
print('Compute manually:')
print_samples(y)

Compute manually:
Sample 0:
 channel 0: 
 [[-1.3416407 -0.4472136]
 [ 0.4472136  1.3416407]]
 channel 1: 
 [[-1.3416407 -0.4472136]
 [ 0.4472136  1.3416407]]
Sample 1:
 channel 0: 
 [[-1.3416407 -0.4472136]
 [ 0.4472136  1.3416407]]
 channel 1: 
 [[-1.3416407 -0.4472136]
 [ 0.4472136  1.3416407]]
