# Computational Graphs: Dynamic vs Static Execution

**Learning Objectives:**
- Understand the fundamental difference between dynamic and static computational graphs
- Learn how PyTorch's eager execution compares to TensorFlow's graph modes
- Explore the trade-offs between flexibility and performance
- Master debugging techniques for each approach

**Prerequisites:** Tensor operations, basic neural network concepts

**Estimated Time:** 50 minutes

In [None]:
import os
import sys
import time

import numpy as np

# Add src to path for our utilities
sys.path.append(os.path.join('..', '..', 'src'))

from utils.comparison_tools import create_side_by_side_comparison

# Try to import frameworks
try:
    import torch
    import torch.nn as nn
    PYTORCH_AVAILABLE = True
    print(f"✅ PyTorch {torch.__version__} available")
except ImportError:
    PYTORCH_AVAILABLE = False
    print("❌ PyTorch not available")

try:
    import tensorflow as tf
    TENSORFLOW_AVAILABLE = True
    print(f"✅ TensorFlow {tf.__version__} available")
    print(f"   Eager execution: {tf.executing_eagerly()}")
except ImportError:
    TENSORFLOW_AVAILABLE = False
    print("❌ TensorFlow not available")

# Set random seeds
np.random.seed(42)
if PYTORCH_AVAILABLE:
    torch.manual_seed(42)
if TENSORFLOW_AVAILABLE:
    tf.random.set_seed(42)

## 1. Graph Fundamentals

Understanding the core concepts of computational graphs.

In [None]:
print("=" * 60)
print("COMPUTATIONAL GRAPH FUNDAMENTALS")
print("=" * 60)

print("""
A computational graph represents mathematical operations as a network:
• Nodes = Operations (add, multiply, relu, etc.)
• Edges = Data flow (tensors)
• Direction = Forward pass (input → output)
• Reverse = Backward pass (gradients)

Example: z = relu((x + y) * w)

    x   y
     \\ /
      +  ← addition node
      |
      *  ← multiplication node
     / \
    w   |
        relu ← activation node
        |
        z

Two fundamental approaches:
1. DYNAMIC: Build graph during execution (PyTorch)
2. STATIC: Build graph first, then execute (TensorFlow @tf.function)
""")

# Simple demonstration of graph concepts
if PYTORCH_AVAILABLE:
    print("\n🔥 PyTorch Dynamic Graph Example:")

    # Create tensors with gradient tracking
    x = torch.tensor(2.0, requires_grad=True)
    y = torch.tensor(3.0, requires_grad=True)
    w = torch.tensor(4.0, requires_grad=True)

    print(f"Inputs: x={x.item()}, y={y.item()}, w={w.item()}")

    # Build graph step by step
    temp = x + y  # Addition node created
    print(f"After x + y: {temp.item()}, grad_fn: {temp.grad_fn}")

    product = temp * w  # Multiplication node created
    print(f"After * w: {product.item()}, grad_fn: {product.grad_fn}")

    z = torch.relu(product)  # ReLU node created
    print(f"After ReLU: {z.item()}, grad_fn: {z.grad_fn}")

    # The graph exists and can be traversed
    print("\nGraph structure:")
    print(f"  z.grad_fn: {z.grad_fn}")
    print(f"  z.grad_fn.next_functions: {z.grad_fn.next_functions}")

    # Compute gradients
    z.backward()
    print("\nGradients:")
    print(f"  dx/dz: {x.grad.item()}")
    print(f"  dy/dz: {y.grad.item()}")
    print(f"  dw/dz: {w.grad.item()}")

if TENSORFLOW_AVAILABLE:
    print("\n🟠 TensorFlow Eager Execution Example:")

    # Create variables
    x = tf.Variable(2.0)
    y = tf.Variable(3.0)
    w = tf.Variable(4.0)

    print(f"Inputs: x={x.numpy()}, y={y.numpy()}, w={w.numpy()}")
    print(f"Eager execution enabled: {tf.executing_eagerly()}")

    # Use GradientTape to record operations
    with tf.GradientTape() as tape:
        temp = x + y  # Executed immediately
        print(f"After x + y: {temp.numpy()}")

        product = temp * w  # Executed immediately
        print(f"After * w: {product.numpy()}")

        z = tf.nn.relu(product)  # Executed immediately
        print(f"After ReLU: {z.numpy()}")

    # Compute gradients
    gradients = tape.gradient(z, [x, y, w])
    print("\nGradients:")
    print(f"  dx/dz: {gradients[0].numpy()}")
    print(f"  dy/dz: {gradients[1].numpy()}")
    print(f"  dw/dz: {gradients[2].numpy()}")

print("\n💡 Key Differences:")
print("  • PyTorch: Graph built during forward pass, stored for backward pass")
print("  • TensorFlow Eager: Operations executed immediately, tape records for gradients")
print("  • Both support automatic differentiation")
print("  • Graph structure affects memory usage and performance")

## 2. PyTorch Dynamic Graphs

Exploring PyTorch's define-by-run approach.

In [None]:
print("\n" + "=" * 60)
print("PYTORCH DYNAMIC GRAPHS")
print("=" * 60)

if PYTORCH_AVAILABLE:
    print("""
    PyTorch Dynamic Graph Features:
    • Graph built during forward pass (define-by-run)
    • Different graph structure each iteration
    • Easy debugging with standard Python tools
    • Flexible control flow (if/else, loops, recursion)
    • Slight performance overhead due to graph construction
    """)

    # Example 1: Conditional computation
    print("\n1. Conditional Computation:")

    def conditional_model(x, use_nonlinearity=True):
        """Model with conditional computation"""
        print(f"  Input shape: {x.shape}, mean: {x.mean().item():.4f}")

        # Linear transformation
        y = x * 2.0 + 1.0
        print(f"  After linear: mean = {y.mean().item():.4f}")

        # Conditional nonlinearity - graph structure changes!
        if use_nonlinearity:
            if y.mean() > 0:
                z = torch.relu(y)
                operation = "ReLU applied"
            else:
                z = torch.tanh(y)
                operation = "Tanh applied"
        else:
            z = y
            operation = "No nonlinearity"

        result = z.sum()
        print(f"  Operation: {operation}")
        print(f"  Final result: {result.item():.4f}")
        print(f"  Grad function: {result.grad_fn}")

        return result

    # Test with different conditions
    x1 = torch.randn(5, requires_grad=True)
    x2 = torch.randn(5, requires_grad=True) - 2.0  # Negative mean

    print("\n  Test 1 - Positive input with nonlinearity:")
    result1 = conditional_model(x1, use_nonlinearity=True)

    print("\n  Test 2 - Negative input with nonlinearity:")
    result2 = conditional_model(x2, use_nonlinearity=True)

    print("\n  Test 3 - No nonlinearity:")
    result3 = conditional_model(x1, use_nonlinearity=False)

    # Example 2: Variable-length sequences
    print("\n\n2. Variable-Length Sequence Processing:")

    def process_variable_sequences(sequences):
        """Process sequences of different lengths"""
        results = []

        for i, seq_length in enumerate(sequences):
            # Create sequence of variable length
            x = torch.randn(seq_length, 3, requires_grad=True)

            # Different processing based on length
            if seq_length <= 3:
                # Short sequences: simple mean
                result = x.mean()
                method = "mean"
            else:
                # Long sequences: weighted sum
                weights = torch.softmax(torch.randn(seq_length), dim=0)
                result = (x * weights.unsqueeze(1)).sum()
                method = "weighted sum"

            print(f"  Sequence {i+1}: length={seq_length}, method={method}, result={result.item():.4f}")
            results.append(result)

        return results

    # Process sequences of different lengths
    sequence_lengths = [2, 5, 3, 8, 1, 6]
    results = process_variable_sequences(sequence_lengths)

    # Example 3: Recursive computation
    print("\n\n3. Recursive Computation:")

    def recursive_computation(x, depth=0, max_depth=3):
        """Recursive function that builds different graphs"""
        print(f"  {'  ' * depth}Depth {depth}: input mean = {x.mean().item():.4f}")

        # Base case
        if depth >= max_depth or x.mean() < 0.1:
            result = x.sum()
            print(f"  {'  ' * depth}Base case reached: {result.item():.4f}")
            return result

        # Recursive case
        y = torch.relu(x - 0.5)  # Reduce values
        return recursive_computation(y, depth + 1, max_depth)

    x_recursive = torch.randn(4, requires_grad=True) + 1.0  # Start with positive values
    recursive_result = recursive_computation(x_recursive)
    print(f"  Final recursive result: {recursive_result.item():.4f}")

    # Compute gradients for recursive computation
    recursive_result.backward()
    print(f"  Gradient computed: {x_recursive.grad is not None}")
    print(f"  Gradient values: {x_recursive.grad}")

else:
    print("PyTorch not available - skipping dynamic graph examples")

print("\n🔥 PyTorch Dynamic Graph Advantages:")
print("  • Natural Python control flow")
print("  • Easy debugging and introspection")
print("  • Flexible model architectures")
print("  • Great for research and prototyping")
print("  • Handles variable-length inputs naturally")

print("\n⚠️ PyTorch Dynamic Graph Considerations:")
print("  • Graph construction overhead each forward pass")
print("  • Memory usage for storing graph")
print("  • Harder to optimize for deployment")
print("  • Requires tracing for production optimization")

## 3. TensorFlow Graph Modes

Understanding TensorFlow's eager execution and graph compilation.

In [None]:
print("\n" + "=" * 60)
print("TENSORFLOW GRAPH MODES")
print("=" * 60)

if TENSORFLOW_AVAILABLE:
    print("""
    TensorFlow Execution Modes:
    • EAGER (default): Operations execute immediately (like PyTorch)
    • GRAPH (@tf.function): Compile to static graph for performance
    • Can switch between modes as needed
    • Graph mode enables optimizations and deployment
    """)

    # Example 1: Eager vs Graph execution
    print("\n1. Eager vs Graph Execution:")

    def eager_computation(x, y):
        """Computation in eager mode"""
        print("  Eager mode - executing immediately")
        z1 = x + y
        print(f"  After addition: {z1.shape}")

        z2 = tf.nn.relu(z1)
        print(f"  After ReLU: {tf.reduce_mean(z2).numpy():.4f}")

        result = tf.reduce_sum(z2)
        print(f"  Final result: {result.numpy():.4f}")

        return result

    @tf.function
    def graph_computation(x, y):
        """Same computation compiled to graph"""
        # Note: print statements won't work in graph mode
        # Use tf.print for debugging in graph mode
        tf.print("Graph mode - compiled execution")

        z1 = x + y
        z2 = tf.nn.relu(z1)
        result = tf.reduce_sum(z2)

        tf.print("Final result:", result)
        return result

    # Test both modes
    x = tf.constant(tf.random.normal((3, 3)))
    y = tf.constant(tf.random.normal((3, 3)))

    print("\n  Eager execution:")
    eager_result = eager_computation(x, y)

    print("\n  Graph execution:")
    graph_result = graph_computation(x, y)

    print(f"\n  Results match: {tf.abs(eager_result - graph_result).numpy() < 1e-6}")

    # Example 2: Performance comparison
    print("\n\n2. Performance Comparison:")

    def complex_computation_eager(x):
        """Complex computation in eager mode"""
        for _i in range(10):
            x = tf.nn.relu(x + 0.1)
            x = tf.nn.dropout(x, rate=0.1)
        return tf.reduce_sum(x)

    @tf.function
    def complex_computation_graph(x):
        """Same computation compiled to graph"""
        for _i in range(10):
            x = tf.nn.relu(x + 0.1)
            x = tf.nn.dropout(x, rate=0.1)
        return tf.reduce_sum(x)

    # Benchmark performance
    large_tensor = tf.random.normal((1000, 1000))

    # Warm up graph compilation
    _ = complex_computation_graph(large_tensor)

    # Time eager execution
    start_time = time.time()
    for _ in range(5):
        _ = complex_computation_eager(large_tensor)
    eager_time = time.time() - start_time

    # Time graph execution
    start_time = time.time()
    for _ in range(5):
        _ = complex_computation_graph(large_tensor)
    graph_time = time.time() - start_time

    print(f"  Eager execution (5 runs): {eager_time:.4f}s")
    print(f"  Graph execution (5 runs): {graph_time:.4f}s")
    if graph_time > 0:
        speedup = eager_time / graph_time
        print(f"  Graph speedup: {speedup:.2f}x")

    # Example 3: Graph tracing and retracing
    print("\n\n3. Graph Tracing Behavior:")

    @tf.function
    def traced_function(x):
        print(f"  Tracing with input shape: {x.shape}")  # Only prints during tracing
        return tf.reduce_sum(x * 2)

    print("\n  First call (triggers tracing):")
    result1 = traced_function(tf.constant([1.0, 2.0, 3.0]))
    print(f"  Result: {result1.numpy()}")

    print("\n  Second call (uses cached graph):")
    result2 = traced_function(tf.constant([4.0, 5.0, 6.0]))
    print(f"  Result: {result2.numpy()}")

    print("\n  Different shape (triggers retracing):")
    result3 = traced_function(tf.constant([1.0, 2.0]))  # Different shape!
    print(f"  Result: {result3.numpy()}")

    # Example 4: Conditional computation in graph mode
    print("\n\n4. Conditional Computation in Graph Mode:")

    @tf.function
    def conditional_graph_computation(x, training=True):
        """Conditional computation that works in graph mode"""
        # Use tf.cond for conditional execution in graph mode
        def apply_dropout():
            return tf.nn.dropout(x, rate=0.5)

        def no_dropout():
            return x

        # tf.cond is the graph-mode equivalent of if/else
        x_processed = tf.cond(training, apply_dropout, no_dropout)

        return tf.reduce_mean(x_processed)

    test_tensor = tf.constant([1.0, 2.0, 3.0, 4.0, 5.0])

    training_result = conditional_graph_computation(test_tensor, training=True)
    inference_result = conditional_graph_computation(test_tensor, training=False)

    print(f"  Training mode result: {training_result.numpy():.4f}")
    print(f"  Inference mode result: {inference_result.numpy():.4f}")

else:
    print("TensorFlow not available - skipping graph mode examples")

print("\n🟠 TensorFlow Graph Mode Advantages:")
print("  • Excellent performance through optimization")
print("  • Ready for production deployment")
print("  • Memory efficient execution")
print("  • Cross-platform compatibility")
print("  • Can export to various formats (SavedModel, TFLite, etc.)")

print("\n⚠️ TensorFlow Graph Mode Considerations:")
print("  • Limited Python control flow (use tf.cond, tf.while_loop)")
print("  • Debugging requires tf.print instead of print")
print("  • Graph retracing overhead with dynamic shapes")
print("  • Less flexible than eager execution")
print("  • Learning curve for graph-specific patterns")

## 4. Debugging Strategies

Different approaches to debugging in dynamic vs static execution.

In [None]:
print("\n" + "=" * 60)
print("DEBUGGING STRATEGIES")
print("=" * 60)

# Debugging comparison
pytorch_debug_code = """
# PyTorch - Easy debugging with standard Python
import torch

def debug_pytorch_model(x):
    print(f"Input shape: {x.shape}")
    print(f"Input mean: {x.mean().item():.4f}")

    # Can use regular Python debugging
    y = torch.relu(x)
    print(f"After ReLU mean: {y.mean().item():.4f}")

    # Conditional debugging
    if y.mean() > 0.5:
        print("High activation detected!")
        # Can set breakpoints here
        import pdb; pdb.set_trace()

    z = y.sum()
    print(f"Final result: {z.item():.4f}")

    return z

# Easy to debug
x = torch.randn(10, requires_grad=True)
result = debug_pytorch_model(x)
result.backward()

# Can inspect gradients easily
print(f"Gradient: {x.grad}")
"""

tensorflow_debug_code = """
# TensorFlow - Different approaches for eager vs graph
import tensorflow as tf

# Eager mode debugging (similar to PyTorch)
def debug_tf_eager(x):
    print(f"Input shape: {x.shape}")
    print(f"Input mean: {tf.reduce_mean(x).numpy():.4f}")

    y = tf.nn.relu(x)
    print(f"After ReLU mean: {tf.reduce_mean(y).numpy():.4f}")

    if tf.reduce_mean(y) > 0.5:
        print("High activation detected!")

    return tf.reduce_sum(y)

# Graph mode debugging (requires tf.print)
@tf.function
def debug_tf_graph(x):
    tf.print("Input shape:", tf.shape(x))
    tf.print("Input mean:", tf.reduce_mean(x))

    y = tf.nn.relu(x)
    tf.print("After ReLU mean:", tf.reduce_mean(y))

    # Conditional debugging in graph mode
    tf.cond(tf.reduce_mean(y) > 0.5,
            lambda: tf.print("High activation detected!"),
            lambda: tf.no_op())

    return tf.reduce_sum(y)

# Usage
x = tf.Variable(tf.random.normal((10,)))
with tf.GradientTape() as tape:
    result = debug_tf_graph(x)
gradients = tape.gradient(result, x)
"""

print(create_side_by_side_comparison(
    pytorch_debug_code,
    tensorflow_debug_code,
    "Debugging Approaches"
))

# Practical debugging examples
if PYTORCH_AVAILABLE and TENSORFLOW_AVAILABLE:
    print("\n🔍 Practical Debugging Examples:")

    # PyTorch debugging
    print("\n1. PyTorch Debugging in Action:")

    def pytorch_debug_example():
        x = torch.randn(5, 3, requires_grad=True)

        # Easy inspection at any point
        print(f"  Input statistics: mean={x.mean().item():.4f}, std={x.std().item():.4f}")
        print(f"  Input range: [{x.min().item():.4f}, {x.max().item():.4f}]")

        # Can check for common issues
        if torch.isnan(x).any():
            print("  ⚠️ NaN detected in input!")

        if torch.isinf(x).any():
            print("  ⚠️ Inf detected in input!")

        # Process data
        y = torch.relu(x)
        z = y.sum(dim=1)  # Sum over features

        print(f"  After processing: {z}")
        print(f"  Gradient function: {z.grad_fn}")

        return z.sum()

    result = pytorch_debug_example()

    # TensorFlow debugging
    print("\n2. TensorFlow Debugging in Action:")

    def tensorflow_debug_example():
        x = tf.Variable(tf.random.normal((5, 3)))

        # Eager mode allows easy inspection
        print(f"  Input statistics: mean={tf.reduce_mean(x).numpy():.4f}, std={tf.math.reduce_std(x).numpy():.4f}")
        print(f"  Input range: [{tf.reduce_min(x).numpy():.4f}, {tf.reduce_max(x).numpy():.4f}]")

        # Check for issues
        if tf.reduce_any(tf.math.is_nan(x)):
            print("  ⚠️ NaN detected in input!")

        if tf.reduce_any(tf.math.is_inf(x)):
            print("  ⚠️ Inf detected in input!")

        # Process data
        with tf.GradientTape() as tape:
            y = tf.nn.relu(x)
            z = tf.reduce_sum(y, axis=1)  # Sum over features
            result = tf.reduce_sum(z)

        print(f"  After processing: {z.numpy()}")

        # Can compute gradients for debugging
        gradients = tape.gradient(result, x)
        print(f"  Gradient statistics: mean={tf.reduce_mean(gradients).numpy():.4f}")

        return result

    result = tensorflow_debug_example()

print("\n🛠️ Debugging Best Practices:")

debugging_tips = {
    "PyTorch": [
        "Use standard Python debugging tools (pdb, print, etc.)",
        "Check tensor shapes and values at each step",
        "Use .item() to extract scalar values for printing",
        "Inspect grad_fn to understand computation graph",
        "Use torch.autograd.detect_anomaly() for gradient issues"
    ],
    "TensorFlow Eager": [
        "Similar to PyTorch - use standard Python tools",
        "Use .numpy() to extract values for inspection",
        "Check tensor shapes and dtypes regularly",
        "Use tf.debugging.assert_* functions for validation",
        "Enable tf.debugging.enable_check_numerics() for NaN/Inf detection"
    ],
    "TensorFlow Graph": [
        "Use tf.print() instead of print() for output",
        "Use tf.debugging.assert_* for runtime checks",
        "Add tf.summary for TensorBoard visualization",
        "Use tf.py_function for complex debugging logic",
        "Consider switching to eager mode for debugging"
    ]
}

for framework, tips in debugging_tips.items():
    print(f"\n{framework}:")
    for tip in tips:
        print(f"  • {tip}")

print("\n📊 Framework Comparison Summary:")

comparison_table = {
    "Aspect": ["Flexibility", "Performance", "Debugging", "Deployment", "Learning Curve"],
    "PyTorch Dynamic": ["Very High", "Good", "Easy", "Requires Tracing", "Easy"],
    "TensorFlow Eager": ["Very High", "Good", "Easy", "Requires Conversion", "Easy"],
    "TensorFlow Graph": ["Limited", "Excellent", "Challenging", "Ready", "Moderate"]
}

print(f"\n{'Aspect':<15} | {'PyTorch':<15} | {'TF Eager':<15} | {'TF Graph':<15}")
print("-" * 70)

for i, aspect in enumerate(comparison_table["Aspect"]):
    pytorch_val = comparison_table["PyTorch Dynamic"][i]
    tf_eager_val = comparison_table["TensorFlow Eager"][i]
    tf_graph_val = comparison_table["TensorFlow Graph"][i]

    print(f"{aspect:<15} | {pytorch_val:<15} | {tf_eager_val:<15} | {tf_graph_val:<15}")

print("\n✅ Key Takeaways:")
print("  • Dynamic graphs offer maximum flexibility and easy debugging")
print("  • Static graphs provide optimal performance and deployment readiness")
print("  • TensorFlow offers both approaches - choose based on your needs")
print("  • Consider using eager mode for development, graph mode for production")
print("  • Both approaches support automatic differentiation effectively")