In [2]:
import math
import inspect
from dataclasses import dataclass

import torch
import torch.nn as nn
from torch.nn import functional as F

In [None]:
class LayerNorm(nn.Module):
    """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """

    def __init__(self, ndim, bias):
        # ndim: Number of attributes (Embedding Dim). Real models use 768+
        # bias: Learnable offset (shift) added to the results
        super().__init__()
        self.weight = nn.Parameter(torch.ones(ndim))
        self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None

    def forward(self, input):
        # forward() is the default "call" function when you run ln(x)
        return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)

In [4]:
import torch
from model import LayerNorm

# 1. Initialize LayerNorm for an embedding size of 4
# We'll set bias=True to see the standard behavior
ln = LayerNorm(ndim=4, bias=True)

# 2. Create Example Input
# Shape: (Batch Size=1, Sequence Length=1, Embedding Dim=4)
# Note how the numbers are all over the place:
x = torch.tensor([[[10.0, -5.0, 50.0, 25.0]]]) 

# 3. Standardize!
output = ln(x)

print("--- INPUT ---")
print(x)
print(f"Mean: {x.mean().item():.2f}, Std: {x.std().item():.2f}")

print("\n--- OUTPUT ---")
print(output)
print(f"Mean: {output.mean().item():.2f}, Std: {output.std().item():.2f}")

--- INPUT ---
tensor([[[10., -5., 50., 25.]]])
Mean: 20.00, Std: 23.45

--- OUTPUT ---
tensor([[[-0.4924, -1.2309,  1.4771,  0.2462]]],
       grad_fn=<NativeLayerNormBackward0>)
Mean: 0.00, Std: 1.15
