In [1]:
import torch.nn as nn
import torch

# Input: batch size=2, dimension=4
BN = nn.BatchNorm2d(4, momentum=0.001)
LN = nn.LayerNorm([4, 2, 2])
IN = nn.InstanceNorm2d(4, momentum=0.001, track_running_stats=True)
GN = nn.GroupNorm(2, 4)

In [20]:
with torch.no_grad():
    bns = []
    lns = []
    ins = []
    gns = []
    # Input: batch size=2, channel=4
    Xs = []
    for _ in range(1000):
        X = torch.randn(2, 4, 2, 2) * torch.Tensor([1, 2, 3, 4]).view([1, 4, 1, 1])
        X = X + torch.Tensor([[[[0.25]]], [[[0.75]]]])
        Xs.append(X)
        bns.append(BN(X))
        lns.append(LN(X))
        ins.append(IN(X))
        gns.append(GN(X))
    
    # X[i,j,_,_] = N(i=0:0.25, i=1:0.75, j)
    from einops import rearrange
    Xs = torch.cat(Xs)
    Xs = rearrange(Xs, '(N B) C H W -> N B C H W', B=2)
    print(Xs.shape)
    # N(0, 1) * a + b = N(b, a)
    print(torch.mean(Xs, dim=(0, 2, 3, 4)))
    print(torch.std(Xs, dim=(0, 1, 3, 4)))

torch.Size([1000, 2, 4, 2, 2])
tensor([0.2167, 0.7709])
tensor([1.0249, 2.0181, 2.9922, 3.9332])


#### Group Normalization
> [Group Normalization, ECCV 2018](https://arxiv.org/abs/1803.08494)

![](../report_imgs/group_normalization.png)

Normalization: A method to train model faster and more stable through normalization of tinputs by re-centering and re-scaling.
* Batch normalization: Normalization for each channel.
* Layer normalization: Normalization for each sample.
* Instance normalization: Normalization for each sample and each channel.
* Group normalization: Normalization for each sample group.

> Note: If batch size is large enough, the performance: BN > GN > LN > IN 
> However, BN has GPU memory issue and cannot set large batch size sometimes. 
> Thus, we do GN in this task.


In [21]:
# Running Stats: (1, 4)
# Mean: (0.25 + 0.75) / 2, std should = [1, 2, 3, 4]
print("BN:", BN.running_mean, BN.running_var**0.5)
def result(x):
    x = torch.concat([b.view([-1, *b.shape]) for b in x])
    return torch.mean(x, 0)[:, :, 0, 0], torch.std(x, 0)[:, :, 0, 0]

print("BN result:\n", *result(bns), sep='\n')

BN: tensor([0.5023, 0.4710, 0.4848, 0.5362]) tensor([1.0352, 2.0148, 2.9973, 3.9475])
BN result:

tensor([[-0.2155, -0.2184, -0.0668, -0.0943],
        [ 0.2823,  0.1797,  0.0206,  0.0361]])
tensor([[0.9713, 0.9612, 0.9779, 0.9821],
        [0.9870, 0.9552, 1.0062, 0.9939]])


In [4]:
# Running Stats: (2, 4)
# Normalize for each sample -> all output should be (0, 1)
print("IN:", IN.running_mean, IN.running_var**0.5)
print("IN result:\n", *result(ins), sep='\n')

IN: tensor([0.3266, 0.3093, 0.3428, 0.2990]) tensor([0.9990, 1.6975, 2.4882, 3.2176])
IN result:

tensor([[-0.0158,  0.0414, -0.0042, -0.0302],
        [ 0.0101,  0.0496, -0.0134,  0.0412]])
tensor([[1.0015, 0.9890, 0.9876, 1.0147],
        [1.0100, 1.0073, 1.0017, 1.0115]])


In [22]:
# Running Stats: (2, 1)
# Output Mean: Should all be 0. (Sample 1=0.25-0.25, Sample 2=0.75-0.75)
# Output Std: each sample has same variance
print("LN result:\n", *result(lns), sep='\n')

LN result:

tensor([[ 0.0315, -0.0710,  0.0216,  0.0093],
        [ 0.0028,  0.0141, -0.0813, -0.0391]])
tensor([[0.4771, 0.7857, 1.0952, 1.3753],
        [0.4890, 0.7932, 1.1020, 1.3691]])


In [6]:
# Group = 1 -> Layer Norm
# Group = 4 -> Instance Norm
# Group = 2 -> Split 2 blocks and do Layer Norm
print("GN result:\n", *result(gns), sep='\n')

GN result:

tensor([[-0.0160,  0.0487, -0.0040, -0.0190],
        [ 0.0212,  0.0353,  0.0173,  0.0181]])
tensor([[0.7633, 1.1811, 0.8882, 1.0921],
        [0.7640, 1.2026, 0.8888, 1.1119]])
