# Module 2 - Exercise 1: Autograd Exploration

## Learning Objectives
- Master PyTorch's automatic differentiation system
- Understand computational graphs and gradient flow
- Practice with multivariable gradients and chain rule
- Explore gradient context management
- Implement higher-order derivatives

## Prerequisites
- Completion of Module 1 exercises
- Understanding of calculus derivatives
- Familiarity with chain rule

## Setup and Test Repository

First, let's clone the test repository and set up our environment for step-by-step validation.

In [None]:
# Clone the test repository
!git clone https://github.com/racousin/data_science_practice.git /tmp/tests 2>/dev/null || true

# Import required modules
import sys
sys.path.append('/tmp/tests/tests/python_deep_learning')

# Import the improved test utilities
from test_utils import NotebookTestRunner, create_inline_test
from module2.test_exercise1 import Exercise1Validator, EXERCISE1_SECTIONS

# Create test runner and validator
test_runner = NotebookTestRunner("module2", 1)
validator = Exercise1Validator()

print("Test framework setup complete!")

## Environment Setup

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

# Print PyTorch version
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

## Section 1: Basic Autograd Operations

Learn the fundamentals of automatic differentiation with simple scalar functions.

In [None]:
# TODO: Create a tensor that requires gradients and compute a simple function
# Create x = 2.0 with requires_grad=True
x = None  # torch.tensor(2.0, requires_grad=True)

# TODO: Compute y = x^2 + 3*x + 1
y = None  # x**2 + 3*x + 1

print(f"x = {x}")
print(f"y = {y}")
print(f"x.requires_grad: {x.requires_grad if x is not None else 'None'}")
print(f"y.requires_grad: {y.requires_grad if y is not None else 'None'}")

In [None]:
# TODO: Compute gradients using backward()
# Call y.backward() to compute gradients
# y.backward()

print(f"dy/dx = {x.grad if x is not None else 'None'}")
print(f"Expected: dy/dx = 2x + 3 = 2*2 + 3 = 7")

In [None]:
# Test Section 1: Basic Autograd Operations
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE1_SECTIONS["Section 1: Basic Autograd Operations"]]
test_runner.test_section("Section 1: Basic Autograd Operations", validator, section_tests, locals())

## Section 2: Multivariable Gradients

Explore gradients with functions of multiple variables.

In [None]:
# TODO: Create two variables with gradients enabled
x1 = None  # torch.tensor(1.0, requires_grad=True)
x2 = None  # torch.tensor(2.0, requires_grad=True)

# TODO: Compute z = x1^2 + x2^3 + x1*x2
z = None  # x1**2 + x2**3 + x1*x2

print(f"x1 = {x1}, x2 = {x2}")
print(f"z = {z}")

In [None]:
# TODO: Compute gradients for multivariable function
# Call z.backward() to compute partial derivatives
# z.backward()

print(f"∂z/∂x1 = {x1.grad if x1 is not None and hasattr(x1, 'grad') else 'None'}")
print(f"∂z/∂x2 = {x2.grad if x2 is not None and hasattr(x2, 'grad') else 'None'}")
print(f"Expected: ∂z/∂x1 = 2*x1 + x2 = 2*1 + 2 = 4")
print(f"Expected: ∂z/∂x2 = 3*x2^2 + x1 = 3*4 + 1 = 13")

In [None]:
# Test Section 2: Multivariable Gradients
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE1_SECTIONS["Section 2: Multivariable Gradients"]]
test_runner.test_section("Section 2: Multivariable Gradients", validator, section_tests, locals())

## Section 3: Vector and Matrix Gradients

Work with gradients of vector and matrix operations.

In [None]:
# TODO: Create a vector with gradients and compute a scalar loss
vec_x = None  # torch.tensor([1.0, 2.0, 3.0], requires_grad=True)

# TODO: Compute vec_loss = sum of squares
vec_loss = None  # torch.sum(vec_x**2)

print(f"vec_x = {vec_x}")
print(f"vec_loss = {vec_loss}")

In [None]:
# TODO: Compute gradients for vector function
# Call vec_loss.backward()
# vec_loss.backward()

print(f"∇vec_loss = {vec_x.grad if vec_x is not None and hasattr(vec_x, 'grad') else 'None'}")
print(f"Expected: gradient should be 2*vec_x = [2, 4, 6]")

In [None]:
# Test vector gradients (first part of Section 3)
vec_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE1_SECTIONS["Section 3: Vector and Matrix Gradients"][:3]]
test_runner.test_section("Section 3a: Vector Gradients", validator, vec_tests, locals())

In [None]:
# TODO: Create a matrix and compute gradients
mat_A = None  # torch.tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True)

# TODO: Compute mat_loss = sum of squares of all elements
mat_loss = None  # torch.sum(mat_A**2)

print(f"mat_A = \n{mat_A}")
print(f"mat_loss = {mat_loss}")

In [None]:
# TODO: Compute matrix gradients
# Call mat_loss.backward()
# mat_loss.backward()

print(f"∇mat_A = \n{mat_A.grad if mat_A is not None and hasattr(mat_A, 'grad') else 'None'}")
print(f"Expected: gradient should be 2*mat_A")

In [None]:
# Test matrix gradients (second part of Section 3)
mat_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE1_SECTIONS["Section 3: Vector and Matrix Gradients"][3:]]
test_runner.test_section("Section 3b: Matrix Gradients", validator, mat_tests, locals())

## Section 4: Computational Graph and Chain Rule

Understand how PyTorch builds and traverses computational graphs.

In [None]:
# TODO: Build a computational graph step by step
graph_x = None  # torch.tensor(2.0, requires_grad=True)

# TODO: Build computation step by step
graph_y = None  # graph_x**2
graph_z = None  # 3*graph_y + 1
graph_w = None  # graph_z**2

print(f"x = {graph_x}")
print(f"y = x^2 = {graph_y}")
print(f"z = 3y + 1 = {graph_z}")
print(f"w = z^2 = {graph_w}")

In [None]:
# TODO: Compute gradients through the computational graph
# Call graph_w.backward()
# graph_w.backward()

print(f"dw/dx = {graph_x.grad if graph_x is not None and hasattr(graph_x, 'grad') else 'None'}")
print(f"Chain rule: dw/dx = dw/dz * dz/dy * dy/dx")
print(f"dw/dz = 2*z = 2*13 = 26")
print(f"dz/dy = 3")
print(f"dy/dx = 2*x = 2*2 = 4")
print(f"Therefore: dw/dx = 26 * 3 * 4 = 312")

In [None]:
# Test Section 4: Computational Graph and Chain Rule
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE1_SECTIONS["Section 4: Computational Graph and Chain Rule"]]
test_runner.test_section("Section 4: Computational Graph and Chain Rule", validator, section_tests, locals())

## Section 5: Gradient Context Management

Learn to control when gradients are computed and stored.

In [None]:
# TODO: Use torch.no_grad() context
x = torch.tensor(3.0, requires_grad=True)

# TODO: Compute operation within no_grad context
with torch.no_grad():
    no_grad_result = None  # x**2 + 2*x

print(f"no_grad_result = {no_grad_result}")
print(f"requires_grad: {no_grad_result.requires_grad if no_grad_result is not None else 'None'}")

In [None]:
# TODO: Use detach() to remove tensor from computational graph
y = x**3 + x
detached_result = None  # y.detach()

print(f"Original y requires_grad: {y.requires_grad}")
print(f"Detached result requires_grad: {detached_result.requires_grad if detached_result is not None else 'None'}")
print(f"Values are equal: {torch.equal(y, detached_result) if detached_result is not None else 'None'}")

In [None]:
# Test Section 5: Gradient Context Management
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE1_SECTIONS["Section 5: Gradient Context Management"]]
test_runner.test_section("Section 5: Gradient Context Management", validator, section_tests, locals())

## Section 6: Higher-Order Derivatives

Compute second derivatives and higher-order gradients.

In [None]:
# TODO: Compute second derivative
x = torch.tensor(2.0, requires_grad=True)

# TODO: Define function f(x) = x^4
y = None  # x**4

# TODO: Compute first derivative
# y.backward(create_graph=True)  # create_graph=True allows computing gradients of gradients
# first_derivative = x.grad.clone()

print(f"f(x) = x^4, x = {x}")
# print(f"f'(x) = 4x^3 = {first_derivative}")

# TODO: Compute second derivative
# x.grad.zero_()  # Clear first derivative
# first_derivative.backward()
second_derivative = None  # x.grad

print(f"f''(x) = 12x^2 = {second_derivative}")
print(f"Expected: f''(2) = 12*4 = 48")

In [None]:
# Test Section 6: Higher-Order Derivatives
section_tests = [(getattr(validator, name), desc) for name, desc in EXERCISE1_SECTIONS["Section 6: Higher-Order Derivatives"]]
test_runner.test_section("Section 6: Higher-Order Derivatives", validator, section_tests, locals())

## Section 7: Gradient Flow Visualization

Visualize how gradients flow through computational graphs.

In [None]:
# Create a more complex computational graph for visualization
def create_complex_function(x):
    """Create a complex function for gradient flow analysis"""
    a = x**2
    b = torch.sin(a)
    c = torch.exp(b)
    d = torch.log(c + 1)
    return d

# Test with different input values
x_values = torch.linspace(-2, 2, 100)
gradients = []

for x_val in x_values:
    x = torch.tensor(x_val.item(), requires_grad=True)
    y = create_complex_function(x)
    y.backward()
    gradients.append(x.grad.item())

# Plot function and its gradient
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
with torch.no_grad():
    y_values = [create_complex_function(x).item() for x in x_values]
plt.plot(x_values.numpy(), y_values)
plt.title('Function: log(exp(sin(x²)) + 1)')
plt.xlabel('x')
plt.ylabel('f(x)')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(x_values.numpy(), gradients)
plt.title("Function's Gradient")
plt.xlabel('x')
plt.ylabel("f'(x)")
plt.grid(True)

plt.tight_layout()
plt.show()

print("The gradient plot shows how the derivative changes across the input domain.")
print("Notice how the gradient reflects the slope of the original function.")

## Section 8: Practical Applications

Apply autograd to real scenarios like optimization and neural network training.

In [None]:
# Simple optimization using gradients
def quadratic_function(x):
    """A simple quadratic function to minimize: f(x) = (x-3)^2 + 1"""
    return (x - 3)**2 + 1

# Initialize parameter
x = torch.tensor(0.0, requires_grad=True)
learning_rate = 0.1
num_steps = 50

# Track optimization progress
x_history = []
loss_history = []

for step in range(num_steps):
    # Forward pass
    loss = quadratic_function(x)
    
    # Record history
    x_history.append(x.item())
    loss_history.append(loss.item())
    
    # Backward pass
    if x.grad is not None:
        x.grad.zero_()
    loss.backward()
    
    # Update parameter
    with torch.no_grad():
        x -= learning_rate * x.grad

print(f"Initial x: {x_history[0]:.4f}")
print(f"Final x: {x_history[-1]:.4f}")
print(f"Target x: 3.0")
print(f"Initial loss: {loss_history[0]:.4f}")
print(f"Final loss: {loss_history[-1]:.4f}")

# Plot optimization progress
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(x_history)
plt.axhline(y=3, color='r', linestyle='--', label='Target (x=3)')
plt.title('Parameter Convergence')
plt.xlabel('Step')
plt.ylabel('x value')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(loss_history)
plt.title('Loss Decrease')
plt.xlabel('Step')
plt.ylabel('Loss')
plt.grid(True)

plt.tight_layout()
plt.show()

## Final Validation

Run the complete test suite to validate all your solutions.

In [None]:
# Display final summary of all tests
test_runner.final_summary()