In [1]:
import opnorm_grad as og
import torch
import torch.nn as nn

class TestModel(nn.Module):
    def __init__(self, opnorm=False):
        super(TestModel, self).__init__()
        if opnorm:
            self.conv1 = og.OpNormConv1d(in_channels=1, out_channels=16, kernel_size=3) 
            self.conv2 = og.OpNormConv1d(in_channels=16, out_channels=32, kernel_size=3)
        else:
            self.conv1 = nn.Conv1d(in_channels=1, out_channels=16, kernel_size=3)
            self.conv2 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3)
        
        self.softmax = nn.Softmax(dim=-1)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # Apply softmax along the sequence dimension
        x = self.softmax(x)
        return x

In [2]:
# Function to compute gradients and return their norms
def get_gradient_norms(model, input_data):
    model.zero_grad()
    output = model(input_data)
    loss = output.sum()
    loss.backward()
    
    grad_norms = {}
    for name, param in model.named_parameters():
        if param.grad is not None:
            grad_norms[name] = param.grad.norm().item()
    return grad_norms


In [5]:
# Demonstrate OpNormConv2d with different image sizes
print("=== 2D Convolution Demo ===")

class TestModel2D(nn.Module):
    def __init__(self, opnorm=False):
        super(TestModel2D, self).__init__()
        if opnorm:
            self.conv1 = og.OpNormConv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        else:
            self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        
    def forward(self, x):
        return self.conv1(x)

# Create sample data with different image sizes
small_image = torch.randn(1, 3, 32, 32)   # Small image (32x32)
large_image = torch.randn(1, 3, 128, 128) # Large image (128x128)

# Initialize models with same weights
torch.manual_seed(42)
opnorm_model_2d = TestModel2D(opnorm=True)
torch.manual_seed(42)
baseline_model_2d = TestModel2D(opnorm=False)

# Test with small image
print("Small Image (32x32):")
small_opnorm_grads = get_gradient_norms(opnorm_model_2d, small_image)
small_baseline_grads = get_gradient_norms(baseline_model_2d, small_image)
print(f"  OpNorm grad norm: {small_opnorm_grads['conv1.weight']:.4f}")
print(f"  Baseline grad norm: {small_baseline_grads['conv1.weight']:.4f}")

# Test with large image
print("\nLarge Image (128x128):")
large_opnorm_grads = get_gradient_norms(opnorm_model_2d, large_image)
large_baseline_grads = get_gradient_norms(baseline_model_2d, large_image)
print(f"  OpNorm grad norm: {large_opnorm_grads['conv1.weight']:.4f}")
print(f"  Baseline grad norm: {large_baseline_grads['conv1.weight']:.4f}")

# Show scaling ratios
print(f"\nBaseline ratio (large/small): {large_baseline_grads['conv1.weight']/small_baseline_grads['conv1.weight']:.2f}")
print(f"OpNorm ratio (large/small): {large_opnorm_grads['conv1.weight']/small_opnorm_grads['conv1.weight']:.2f}")

print("\nOpNorm2D normalizes by H_out × W_out, keeping gradients consistent across image sizes!")


=== 2D Convolution Demo ===
Small Image (32x32):
  OpNorm grad norm: 0.2607
  Baseline grad norm: 266.9248

Large Image (128x128):
  OpNorm grad norm: 0.1656
  Baseline grad norm: 2713.1899

Baseline ratio (large/small): 10.16
OpNorm ratio (large/small): 0.64

OpNorm2D normalizes by H_out × W_out, keeping gradients consistent across image sizes!
