## Implementation of BatchNorm1D and LayerNorm

In [2]:
import torch
import torch.nn as nn



In [3]:
torch.manual_seed(42)
B,T,C = 2,3,2 # batch, time, channels
x = torch.randint(1,10,(B,T,C)).float()
x

tensor([[[7., 6.],
         [8., 5.],
         [1., 3.]],

        [[8., 6.],
         [5., 3.],
         [5., 5.]]])

### *BatchNorm1D*

In [22]:
import numpy as np

x = [[[7., 6.],
      [8., 5.],
      [1., 3.]],

     [[8., 6.],
      [5., 3.],
      [5., 5.]]]

feature_1 = np.array([7,8,1,8,5,5])
feature_2 = np.array([6,5,3,6,3,5])

feature_1_mean = np.mean(feature_1)
feature_2_mean = np.mean(feature_2)

feature_1_var = np.var(feature_1)
feature_2_var = np.var(feature_2)

means = np.array([[feature_1_mean, feature_2_mean]])
vars = np.array([[feature_1_var, feature_2_var]])

y_nonscaled = (x - means) / np.sqrt(vars + 1e-5)
y_nonscaled


array([[[ 0.54944179,  1.06904153],
        [ 0.96152313,  0.26726038],
        [-1.92304626, -1.33630191]],

       [[ 0.96152313,  1.06904153],
        [-0.27472089, -1.33630191],
        [-0.27472089,  0.26726038]]])

feature_1 = [7,8,1,8,5,5]
feature_2 = [6,5,3,6,3,5]

feature_1_mean = 5.67
feature_2_mean = 4.67

feature_1_std = 5.67
feature_1_std = 5.67



In [4]:
batch_norm = nn.BatchNorm1d(num_features=x.size(2))
x_transposed = x.transpose(1, 2)
y_normalized = batch_norm(x_transposed)
torch_y = y_normalized.transpose(1, 2)
torch_y

tensor([[[ 0.5494,  1.0690],
         [ 0.9615,  0.2673],
         [-1.9230, -1.3363]],

        [[ 0.9615,  1.0690],
         [-0.2747, -1.3363],
         [-0.2747,  0.2673]]], grad_fn=<TransposeBackward0>)

In [5]:
epsilon = 1e-5
gamma = torch.ones(x.shape[2])
beta = torch.zeros(x.shape[2])

# Calculate mean and variance for batch normalization
mean = x.mean(dim=(0, 1), keepdim=True)  # Mean across batches and time steps
variance = x.var(dim=(0, 1), unbiased=False, keepdim=True)  # Variance across batches and time steps

# Normalize
y = (x - mean) / torch.sqrt(variance + epsilon) * gamma + beta
y

tensor([[[ 0.5494,  1.0690],
         [ 0.9615,  0.2673],
         [-1.9230, -1.3363]],

        [[ 0.9615,  1.0690],
         [-0.2747, -1.3363],
         [-0.2747,  0.2673]]])

In [26]:
torch.allclose(y, torch_y)

True

In [27]:
torch.allclose(torch.tensor(y_nonscaled, dtype=torch.float), torch_y)

True

### *LayerNorm*

In [10]:
layer_norm = torch.nn.LayerNorm(normalized_shape=[2])
# Calculate LayerNorm
torch_normalized = layer_norm(x)
torch_normalized

tensor([[[ 1.0000, -1.0000],
         [ 1.0000, -1.0000],
         [-1.0000,  1.0000]],

        [[ 1.0000, -1.0000],
         [ 1.0000, -1.0000],
         [ 0.0000,  0.0000]]], grad_fn=<NativeLayerNormBackward0>)

In [11]:
def layer_norm(x):
  mu = x.mean(dim=2, keepdim=True)
  var = x.var(dim=2, unbiased=False, keepdim=True)
  eps = 1e-5

  y = (x-mu) / torch.sqrt(var + eps)
  return y

x_normalized = layer_norm(x)
x_normalized

tensor([[[ 1.0000, -1.0000],
         [ 1.0000, -1.0000],
         [-1.0000,  1.0000]],

        [[ 1.0000, -1.0000],
         [ 1.0000, -1.0000],
         [ 0.0000,  0.0000]]])

In [50]:
import numpy as np

x = [[[7., 6.],
      [8., 5.],
      [1., 3.]],

     [[8., 6.],
      [5., 3.],
      [5., 5.]]]

means = np.mean(x, axis=2, keepdims=True) 
vars = np.var(x, axis=2, keepdims=True) 

y_nonscaled = (x - means) / (np.sqrt(vars + 1e-5))
y_nonscaled

array([[[ 0.99998   , -0.99998   ],
        [ 0.99999778, -0.99999778],
        [-0.999995  ,  0.999995  ]],

       [[ 0.999995  , -0.999995  ],
        [ 0.999995  , -0.999995  ],
        [ 0.        ,  0.        ]]])

In [56]:
assert torch.allclose(x_normalized, torch_normalized)
assert torch.allclose(torch.tensor(y_nonscaled, dtype=torch.float), torch_normalized)
print('all equal!')

all equal!
