# Sequential vs Sequential2D Comparison

This notebook compares PyTorch's built-in Sequential container with our custom Sequential2D container, demonstrating:

1. **Functional Equivalence**: Cases where both produce identical results
2. **Architectural Differences**: Unique capabilities of Sequential2D

## Learning Objectives

- Understand the differences between linear sequential architectures and 2D block architectures
- Learn when Sequential2D provides advantages over standard Sequential
- Analyze performance trade-offs in different scenarios
- Gain insights into advanced neural network architecture design

In [1]:
# Import required libraries
import torch
import torch.nn as nn
import numpy as np
import time
import matplotlib.pyplot as plt
import pandas as pd
from iterativennsimple.Sequential2D import Sequential2D, Identity
from iterativennsimple.Sequential1D import Sequential1D

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")  # Force CPU for this example
print(f"Using device: {device}")

Using device: cuda


## Part 1: Functional Equivalence

Let's start by showing how Sequential2D can replicate the behavior of PyTorch's Sequential container.

In [2]:
def create_equivalent_networks():
    """
    Create functionally equivalent networks using Sequential and Sequential2D
    """
    # Network dimensions
    input_size = 784  # MNIST-like input
    hidden_size = 128
    output_size = 10
    
    f1 = nn.Linear(input_size, hidden_size)
    f2 = nn.Linear(hidden_size, hidden_size)
    f3 = nn.Linear(hidden_size, output_size)

    # PyTorch Sequential Network
    sequential_net = nn.Sequential(
        f1,
        nn.ReLU(),
        f2,
        nn.ReLU(),
        f3
    )
    
    in_features_list = [input_size, hidden_size, hidden_size, output_size]
    out_features_list = [input_size, hidden_size, hidden_size, output_size]
    
    # Create blocks matrix for linear chain
    I = Identity(in_features=input_size, out_features=input_size)
    # Note, the ReLU activations are included in the Sequential1D blocks, but can go different places
    # in the Sequential2D structure.
    F1 = Sequential1D(nn.Sequential(f1),            in_features=input_size,  out_features=hidden_size)
    F2 = Sequential1D(nn.Sequential(nn.ReLU(), f2), in_features=hidden_size, out_features=hidden_size)
    F3 = Sequential1D(nn.Sequential(nn.ReLU(), f3), in_features=hidden_size, out_features=output_size)

    # NOTE: Mind the transposed structure of the blocks in the matrix!
    blocks = [[I,    F1,   None, None],
              [None, None, F2,   None],
              [None, None, None, F3],
              [None, None, None, None]]
    W_parameters = input_size * hidden_size + hidden_size * hidden_size + hidden_size * output_size 
    b_parameters = hidden_size + hidden_size + output_size
    print(f"Theoretical number of parameters:{W_parameters + b_parameters}")
    sequential2d_net = Sequential2D(in_features_list, out_features_list, blocks)    
    return sequential_net, sequential2d_net

# Create equivalent networks
seq_net, seq2d_net = create_equivalent_networks()

print("Network architectures created successfully!")
print(f"Sequential parameters: {sum(p.numel() for p in seq_net.parameters())}")
print(f"Sequential2D parameters: {sum(p.numel() for p in seq2d_net.parameters())}")

Theoretical number of parameters:118282
Network architectures created successfully!
Sequential parameters: 118282
Sequential2D parameters: 118282


In [3]:
def test_equivalence():
    """
    Test that both networks produce identical outputs
    """
    # Create test input
    batch_size = 32
    input_size = 784
    test_input = torch.randn(batch_size, input_size)
    
    # Get outputs from both networks
    with torch.no_grad():
        # Sequential network forward pass
        seq_output = seq_net(test_input)
        
        seq2d_output = [test_input, None, None, None]

        # Here is the magic! You iterate a *fixed* function (seq2d_net) on the input x.  Though the magic of linear
        # algebra, this is equivalent to the sequential network.
        for i in range(3):
            seq2d_output = seq2d_net(seq2d_output)
    # Check if outputs are identical.
    # Note: the "output" of the Sequential2D is a list, where the last element is the output for this particular structure.
    max_diff = torch.max(torch.abs(seq_output - seq2d_output[3])).item()
    
    print(f"Maximum difference between outputs: {max_diff:.2e}")
    print(f"Outputs are {'identical' if max_diff < 1e-6 else 'different'}")
    
    return seq_output, seq2d_output

seq_output, seq2d_output = test_equivalence()

Maximum difference between outputs: 0.00e+00
Outputs are identical


## Part 2: Training comparison

In [None]:
# FILL in equivalence test for training

## Part 3: Unique Capabilities of Sequential2D

Now let's explore scenarios where Sequential2D offers capabilities that standard Sequential cannot provide.

In [4]:
def create_complex_sequential2d():
    """
    Create a Sequential2D network with complex connectivity patterns
    that cannot be represented by standard Sequential
    """
    
    # Create a complex connectivity pattern
    cfg = { 'in_features_list': [50, 100, 200, 150],
            'out_features_list': [100, 200, 150, 10],
            'block_types': [
                ['Linear', 'Linear', None,     None],
                [None,     'Linear', 'Linear', 'Linear'],
                [None,     None,     'Linear', 'Linear'],
                [None,     None,     None,     'Linear']
        ]
    }

    complex_net = Sequential2D.from_config(cfg)
    
    print("Complex Sequential2D Network Architecture:")
    print("- Multiple parallel paths from input to output")
    print("- Skip connections across multiple layers") 
    print("- Aggregation of features at different scales")
    print(f"- Total parameters: {sum(p.numel() for p in complex_net.parameters())}")
    
    return complex_net

complex_net = create_complex_sequential2d()

Complex Sequential2D Network Architecture:
- Multiple parallel paths from input to output
- Skip connections across multiple layers
- Aggregation of features at different scales
- Total parameters: 85330


In [5]:
def demonstrate_complex_forward():
    """
    Demonstrate the complex forward pass of Sequential2D
    """
    batch_size = 16
    test_input = torch.randn(batch_size, 50+100+200+150)
    
    # Forward pass through complex network
    output = complex_net(test_input)
    
    print(f"Input shape: {test_input.shape}")
    print(f"Output shape: {output.shape}")
    
    # Let's also try the list-based forward pass
    input_list = [torch.randn(batch_size, 50), None, None, None]  # Only provide input to first path
    output_list = complex_net.forward_list(input_list)
    
    print(f"\nList-based forward pass:")
    for i, out in enumerate(output_list):
        if out is not None:
            print(f"Output {i} shape: {out.shape}")
        else:
            print(f"Output {i}: None")
    
    return output, output_list

output, output_list = demonstrate_complex_forward()

Input shape: torch.Size([16, 500])
Output shape: torch.Size([16, 460])

List-based forward pass:
Output 0 shape: torch.Size([16, 100])
Output 1 shape: torch.Size([16, 200])
Output 2: None
Output 3: None
