In [1]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import numpy as np



In [29]:
# ============================================================================
# Visualize ReLU or GELU Curve
# ============================================================================

# Create a range of input values from -5 to 5
import torch.nn.functional as F
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

x = torch.random(-5, 5, (1000,1)).to(device)
print(x.shape)

weight = nn.Parameter(torch.ones(1)).to(device)
bias = nn.Parameter(torch.zeros(1)).to(device)

y = F.layer_norm(x, weight.shape, 
weight,
bias, eps=1e-5).to_device().to(device)

# Convert to numpy for plotting
x_np = x.numpy()
y_np = y.detach().numpy()

# Create the plot
plt.figure(figsize=(10, 6))
plt.plot(x_np, y_np, 'b-', linewidth=2, label='ReLU: f(x) = max(0, x)')
plt.axhline(y=0, color='k', linestyle='--', linewidth=0.5, alpha=0.5)
plt.axvline(x=0, color='k', linestyle='--', linewidth=0.5, alpha=0.5)
plt.grid(True, alpha=0.3)
plt.xlabel('Input (x)', fontsize=12)
plt.ylabel('Output f(x)', fontsize=12)
plt.title('ReLU Activation Function', fontsize=14, fontweight='bold')
plt.legend(fontsize=11)
plt.xlim(-5, 5)
plt.ylim(-0.5, 5)

# Add annotations
plt.annotate('Linear region\n(slope = 1)', 
             xy=(3, 3), xytext=(3.5, 2),
             arrowprops=dict(arrowstyle='->', color='green', lw=1.5),
             fontsize=10, color='green', fontweight='bold')

plt.annotate('Zero region\n(slope = 0)', 
             xy=(-3, 0), xytext=(-4, 1.5),
             arrowprops=dict(arrowstyle='->', color='red', lw=1.5),
             fontsize=10, color='red', fontweight='bold')

plt.tight_layout()
plt.show()

print("\nReLU Characteristics:")
print("- For x > 0: Linear with slope = 1")
print("- For x <= 0: Output = 0 (dead neurons)")
print("- Non-linear activation function")
print("- Helps with vanishing gradient problem")


torch.Size([1000, 1])


NotImplementedError: "LayerNormKernelImpl" not implemented for 'Long'

In [None]:
# ============================================================================
# Practical Example: Freezing Model Parameters
# ============================================================================

print("=" * 70)
print("Practical Example: Freezing Model Parameters")
print("=" * 70)

# Create a simple model
class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(5, 10)
        self.layer2 = nn.Linear(10, 3)
    
    def forward(self, x):
        return self.layer2(torch.relu(self.layer1(x)))

model = SimpleModel()

print("Before freezing:")
for name, param in model.named_parameters():
    print(f"  {name}: requires_grad = {param.requires_grad}")

# Freeze layer1 parameters
print("\nFreezing layer1...")
for param in model.layer1.parameters():
    param.requires_grad_(False)

print("\nAfter freezing layer1:")
for name, param in model.named_parameters():
    print(f"  {name}: requires_grad = {param.requires_grad}")

# Test forward and backward
x = torch.randn(2, 5, requires_grad=True)
output = model(x)
loss = output.sum()

print(f"\nForward pass successful: output shape = {output.shape}")

# Backward pass - only layer2 will have gradients
loss.backward()

print("\nGradients after backward:")
for name, param in model.named_parameters():
    if param.grad is not None:
        print(f"  {name}: gradient exists (shape: {param.grad.shape})")
    else:
        print(f"  {name}: no gradient (frozen)")
