📝 **Author:** Amirhossein Heydari - 📧 **Email:** <amirhosseinheydari78@gmail.com> - 📍 **Origin:** [mr-pylin/pytorch-workshop](https://github.com/mr-pylin/pytorch-workshop)

---


**Table of contents**<a id='toc0_'></a>    
- [Dependencies](#toc1_)    
- [Dataset Normalization](#toc2_)    
  - [Min-Max](#toc2_1_)    
  - [Z-score](#toc2_2_)    
- [Network Normalization](#toc3_)    
  - [Batch Normalization](#toc3_1_)    
  - [Layer Normalization](#toc3_2_)    
  - [Instance Normalization](#toc3_3_)    
  - [Group Normalization](#toc3_4_)    

<!-- vscode-jupyter-toc-config
	numbering=false
	anchor=true
	flat=false
	minLevel=1
	maxLevel=6
	/vscode-jupyter-toc-config -->
<!-- THIS CELL WILL BE REPLACED ON TOC UPDATE. DO NOT WRITE YOUR TEXT IN THIS CELL -->

# <a id='toc1_'></a>[Dependencies](#toc0_)


In [None]:
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from torchvision.transforms import v2

# <a id='toc2_'></a>[Dataset Normalization](#toc0_)

- Min-Max normalization
- Z-score normalization


In [None]:
trainset = CIFAR10("../../datasets", train=True, transform=None, download=False)

# log
print(f"trainset.data.shape : {trainset.data.shape}")
print(f"trainset.data.dtype : {trainset.data.dtype}")
print(f"type(trainset.data) : {type(trainset.data)}")

## <a id='toc2_1_'></a>[Min-Max](#toc0_)

- there is no any built-in feature for this type of normalization in pytorch


In [None]:
min_value = trainset.data.min(axis=(0, 1, 2))
max_value = trainset.data.max(axis=(0, 1, 2))

# log
print(f"Minimum values per channel : {min_value}")
print(f"Maximum values per channel : {max_value}")

In [None]:
# normalize to the range: (0, 1)
minmax_trainset_1 = (trainset.data - min_value) / (max_value - min_value)

# normalize to the range: (-1, 1)
minmax_trainset_2 = minmax_trainset_1 * 2 - 1

# log
print(f"Minimum values for minmax_trainset_1 : {minmax_trainset_1.min(axis=(0, 1, 2))}")
print(f"Maximum values for minmax_trainset_1 : {minmax_trainset_1.max(axis=(0, 1, 2))}")
print("-" * 50)
print(f"Minimum values for minmax_trainset_2 : {minmax_trainset_2.min(axis=(0, 1, 2))}")
print(f"Maximum values for minmax_trainset_2 : {minmax_trainset_2.max(axis=(0, 1, 2))}")

## <a id='toc2_2_'></a>[Z-score](#toc0_)

- there is no any built-in feature for this type of normalization in pytorch


In [None]:
mean_value = trainset.data.mean(axis=(0, 1, 2))
std_value = trainset.data.std(axis=(0, 1, 2))

# log
print(f"Mean values per channel : {mean_value}")
print(f"STD values per channel  : {std_value}")

In [None]:
# standardize with mean:0 and std:1
zscore_trainset_1 = (trainset.data - mean_value) / std_value

# standardize with mean:2 and std:5
zscore_trainset_2 = zscore_trainset_1 * 5 + 2

# log
print(f"Mean values for minmax_trainset_1 : {zscore_trainset_1.mean(axis=(0, 1, 2))}")
print(f"STD values for minmax_trainset_1  : {zscore_trainset_1.std(axis=(0, 1, 2))}")
print("-" * 50)
print(f"Mean values for minmax_trainset_2 : {zscore_trainset_2.mean(axis=(0, 1, 2))}")
print(f"STD values for minmax_trainset_2  : {zscore_trainset_2.std(axis=(0, 1, 2))}")

# <a id='toc3_'></a>[Network Normalization](#toc0_)

- Batch normalization
- Layer normalization
- Instance normalization
- group normalization


In [None]:
transform = v2.Compose(
    [
        v2.ToImage(),
        v2.ToDtype(torch.float32, scale=True),
    ]
)

trainset = CIFAR10("../../datasets", train=True, transform=transform, download=False)

In [None]:
batch_size = 8
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=False)

first_batch = next(iter(trainloader))[0]

# log
print(f"first_batch.shape : {first_batch.shape}")
print(f"first_batch.dtype : {first_batch.dtype}")
print(f"type.first_batch) : {type(first_batch)}")

In [None]:
in_channels = first_batch.shape[1]
out_channels = 16

model = torch.nn.Sequential(torch.nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3))

features_maps = model(first_batch)

# log
print(f"features_maps.shape : {features_maps.shape}")

## <a id='toc3_1_'></a>[Batch Normalization](#toc0_)


In [None]:
bn_mean = features_maps.mean(dim=(0, 2, 3), keepdim=True)
bn_std = features_maps.std(dim=(0, 2, 3), keepdim=True)

bn_result_1 = (features_maps - bn_mean) / bn_std
bn_result_2 = torch.nn.BatchNorm2d(out_channels, affine=False, eps=0)(features_maps)

# log
print(f"bn_mean.shape : {bn_mean.shape}")
print(f"bn_std.shape  : {bn_std.shape}")
print(torch.allclose(bn_result_1, bn_result_2, atol=1e-03))

## <a id='toc3_2_'></a>[Layer Normalization](#toc0_)


In [None]:
ln_mean = features_maps.mean(dim=(1, 2, 3), keepdim=True)
ln_std = features_maps.std(dim=(1, 2, 3), keepdim=True)

ln_result_1 = (features_maps - ln_mean) / ln_std
ln_result_2 = torch.nn.LayerNorm(features_maps.shape[1:], elementwise_affine=False, eps=0)(features_maps)

# log
print(f"ln_mean.shape : {ln_mean.shape}")
print(f"ln_std.shape  : {ln_std.shape}")
print(torch.allclose(ln_result_1, ln_result_2, atol=1e-03))

## <a id='toc3_3_'></a>[Instance Normalization](#toc0_)


In [None]:
in_mean = features_maps.mean(dim=(2, 3), keepdim=True)
in_std = features_maps.std(dim=(2, 3), keepdim=True)

in_result_1 = (features_maps - in_mean) / in_std
in_result_2 = torch.nn.InstanceNorm2d(out_channels, affine=False, eps=0)(features_maps)

# log
print(f"in_mean.shape : {in_mean.shape}")
print(f"in_std.shape  : {in_std.shape}")
print(torch.allclose(in_result_1, in_result_2, atol=1e-02))

## <a id='toc3_4_'></a>[Group Normalization](#toc0_)


In [None]:
groups = [features_maps[:, :8, :, :], features_maps[:, 8:, :, :]]

gn_mean_1 = groups[0].mean(dim=(1, 2, 3), keepdim=True)
gn_std_1 = groups[0].std(dim=(1, 2, 3), keepdim=True)
result_1 = (groups[0] - gn_mean_1) / gn_std_1

gn_mean_2 = groups[1].mean(dim=(1, 2, 3), keepdim=True)
gn_std_2 = groups[1].std(dim=(1, 2, 3), keepdim=True)
result_2 = (groups[1] - gn_mean_2) / gn_std_2

gn_result_1 = torch.concatenate([result_1, result_2], dim=1)
gn_result_2 = torch.nn.GroupNorm(num_groups=2, num_channels=out_channels, affine=False)(features_maps)

# log
print(f"gn_mean_1.shape : {gn_mean_1.shape}")
print(f"gn_std_1.shape  : {gn_std_1.shape}")
print(f"gn_mean_2.shape : {gn_mean_2.shape}")
print(f"gn_std_2.shape  : {gn_std_2.shape}")
print(torch.allclose(gn_result_1, gn_result_2, atol=1e-03))