In [None]:
import torch
import torch.nn as nn

\begin{align}
        y = \frac{x-E[x]}{\sqrt{Var[x]+ϵ}}*\gamma + \beta
    \end{align}

# 1. Batch Normalization

In [None]:
batch_norm = nn.BatchNorm2d(num_features= 1,
                            eps=1e-05,
                            momentum=0.1,
                            affine=True, track_running_stats=True)


# 2. Layer Normalization

In [None]:
layer_norm = nn.LayerNorm(normalized_shape=1,
                          eps=1e-05,
                          elementwise_affine=True, bias=True)


# 3. Instance Normalization

## 3.1 One sample - One chanel

In [None]:
num_chanel = 1
eps = 1e-5

In [None]:
X = torch.tensor([
    [[[1, 5],
     [4, 0]]]
], dtype=torch.float)

X.shape

torch.Size([1, 1, 2, 2])

In [None]:
X_mean =torch.mean(X, dim=(2, 3))[..., None, None]
X_var = torch.var(X, correction=0, dim=(2, 3))[..., None, None]

X_normalized = (X- X_mean) / torch.sqrt(X_var + eps)
X_normalized

tensor([[[[-0.7276,  1.2127],
          [ 0.7276, -1.2127]]]])

In [None]:
with torch.no_grad():
    instance_norm = nn.InstanceNorm2d(num_features=num_chanel,
                                    eps=eps,
                                    momentum=0.1,
                                    affine=False,
                                    track_running_stats=False)
    normalized_torch = instance_norm(X)

In [None]:
normalized_torch

tensor([[[[-0.7276,  1.2127],
          [ 0.7276, -1.2127]]]])

## 3.2 One sample - Two chanels

In [None]:
num_chanel = 2
eps = 1e-5

In [None]:
X = torch.tensor([
    [[[1, 5],
     [4, 0]],

    [[5, 8],
     [5, 1]]]
], dtype=torch.float)

X.shape

torch.Size([1, 2, 2, 2])

In [None]:
X_mean =torch.mean(X, dim=(2, 3))[..., None, None]
X_var = torch.var(X, correction=0, dim=(2, 3))[..., None, None]

X_normalized = (X- X_mean) / torch.sqrt(X_var + eps)
X_normalized

tensor([[[[-0.7276,  1.2127],
          [ 0.7276, -1.2127]],

         [[ 0.1005,  1.3065],
          [ 0.1005, -1.5076]]]])

In [None]:
with torch.no_grad():
    instance_norm = nn.InstanceNorm2d(num_features=2,
                                    eps=1e-05,
                                    momentum=0.1,
                                    affine=False,
                                    track_running_stats=False)
    normalized_torch = instance_norm(X)

In [None]:
normalized_torch

tensor([[[[-0.7276,  1.2127],
          [ 0.7276, -1.2127]],

         [[ 0.1005,  1.3065],
          [ 0.1005, -1.5076]]]])

## 3.3 Two samples - Two chanels

In [None]:
num_chanel = 2
eps = 1e-5

In [None]:
X = torch.tensor([
    [[[1, 5],
     [4, 0]],
    [[9, 2],
     [6, 3]]],

    [[[6, 3],
     [1, 7]],
    [[0, 2],
     [1, 4]]]

], dtype=torch.float)

X.shape

torch.Size([2, 2, 2, 2])

In [None]:
X_mean =torch.mean(X, dim=(2, 3))[..., None, None]
X_var = torch.var(X, correction=0, dim=(2, 3))[..., None, None]

X_normalized = (X- X_mean) / torch.sqrt(X_var + eps)
X_normalized

tensor([[[[-0.7276,  1.2127],
          [ 0.7276, -1.2127]],

         [[ 1.4606, -1.0954],
          [ 0.3651, -0.7303]]],


        [[[ 0.7338, -0.5241],
          [-1.3628,  1.1531]],

         [[-1.1832,  0.1690],
          [-0.5071,  1.5213]]]])

In [None]:
with torch.no_grad():
    instance_norm = nn.InstanceNorm2d(num_features=num_chanel,
                                    eps=eps,
                                    momentum=0.1,
                                    affine=False,
                                    track_running_stats=False)
    normalized_torch = instance_norm(X)

In [None]:
normalized_torch

tensor([[[[-0.7276,  1.2127],
          [ 0.7276, -1.2127]],

         [[ 1.4606, -1.0954],
          [ 0.3651, -0.7303]]],


        [[[ 0.7338, -0.5241],
          [-1.3628,  1.1531]],

         [[-1.1832,  0.1690],
          [-0.5071,  1.5213]]]])

# 4. Group Normalization

In [None]:
num_channels = 4
num_groups = 2
assert (num_channels % num_groups) == 0, "num_channels must be divisible by num_groups"
eps = 1e-5

In [None]:
X = torch.tensor([
    [
        [[1, 5],
         [4, 7]],

        [[1, 2],
         [4, 0]],

        [[9, 2],
         [0, 3]],

        [[6, 3],
         [1, 8]]
    ],

    [
        [[5, 2],
         [6, 3]],

        [[1, 7],
         [0, 7]],

        [[0, 2],
         [3, 3]],

         [[1, 4],
         [2, 5]]
    ]

], dtype=torch.float)

X.shape

torch.Size([2, 4, 2, 2])

In [None]:
chanel_per_gr = num_channels // num_groups

X_normalized = torch.clone(X)

for i in range(X_normalized.size(0)):
    for c in range(0, num_channels, chanel_per_gr):
        X_gr = X_normalized[i, c:c+chanel_per_gr,:,:]
        X_gr_mean =torch.mean(X_gr)
        X_gr_var = torch.var(X_gr, correction=0)
        X_normalized[i, c:c+chanel_per_gr,:,:] = (X_gr- X_gr_mean) / torch.sqrt(X_gr_var + eps)


X_normalized

tensor([[[[-0.8944,  0.8944],
          [ 0.4472,  1.7889]],

         [[-0.8944, -0.4472],
          [ 0.4472, -1.3416]],

         [[ 1.6222, -0.6489],
          [-1.2978, -0.3244]],

         [[ 0.6489, -0.3244],
          [-0.9733,  1.2978]]],


        [[[ 0.4376, -0.7293],
          [ 0.8266, -0.3404]],

         [[-1.1183,  1.2155],
          [-1.5073,  1.2155]],

         [[-1.6667, -0.3333],
          [ 0.3333,  0.3333]],

         [[-1.0000,  1.0000],
          [-0.3333,  1.6667]]]])

In [None]:
with torch.no_grad():
    group_norm = nn.GroupNorm(num_groups=2,
                            num_channels=4,
                            eps=1e-05,
                            affine=True)
    normalized_torch = group_norm(X)

In [None]:
normalized_torch

tensor([[[[-0.8944,  0.8944],
          [ 0.4472,  1.7889]],

         [[-0.8944, -0.4472],
          [ 0.4472, -1.3416]],

         [[ 1.6222, -0.6489],
          [-1.2978, -0.3244]],

         [[ 0.6489, -0.3244],
          [-0.9733,  1.2978]]],


        [[[ 0.4376, -0.7293],
          [ 0.8266, -0.3404]],

         [[-1.1183,  1.2155],
          [-1.5073,  1.2155]],

         [[-1.6667, -0.3333],
          [ 0.3333,  0.3333]],

         [[-1.0000,  1.0000],
          [-0.3333,  1.6667]]]])