# 4. RMSNorm (Root Mean Square Normalization)

RMSNorm normalizes activations in neural networks!
It's simpler than LayerNorm but very effective.
Makes training more stable!


In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt


## 1. What is RMSNorm?

RMSNorm normalizes activations using root mean square!
Formula: output = input / RMS(input) * scale
Where RMS(x) = √(mean(x²))


In [None]:
# Manual RMSNorm calculation
x = torch.tensor([1.0, 2.0, 3.0, 4.0])

# Step 1: Calculate RMS (Root Mean Square)
# RMS = sqrt(mean(x²))
x_squared = x ** 2
mean_x_squared = x_squared.mean()
rms = torch.sqrt(mean_x_squared)

# Step 2: Normalize
normalized = x / rms

# Step 3: Scale (optional, learnable parameter)
scale = 1.0  # Usually a learnable parameter
output = normalized * scale

print("RMSNorm Calculation:")
print(f"Input: {x}")
print()
print(f"Step 1: x² = {x_squared}")
print(f"        mean(x²) = {mean_x_squared:.4f}")
print(f"        RMS = √{mean_x_squared:.4f} = {rms:.4f}")
print()
print(f"Step 2: Normalize = x / RMS")
print(f"        = {x} / {rms:.4f}")
print(f"        = {normalized}")
print()
print(f"Step 3: Scale = {normalized} × {scale}")
print(f"        = {output}")
print()

print(f"Verification: RMS of output = {torch.sqrt((output**2).mean()):.4f}")
print("(Should be close to 1.0 if scale=1.0)")
