# Tutorial 02: Multivariable Derivatives

Gradients, Jacobians, and Hessians visualized.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
np.random.seed(42)

## Part 1: Partial Derivatives

In [None]:
# f(x, y) = x² + y²
def f(x, y):
    return x**2 + y**2

def df_dx(x, y):
    return 2*x

def df_dy(x, y):
    return 2*y

# Create grid
x = np.linspace(-3, 3, 50)
y = np.linspace(-3, 3, 50)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)

fig = plt.figure(figsize=(15, 5))

# 3D surface
ax1 = fig.add_subplot(131, projection='3d')
ax1.plot_surface(X, Y, Z, cmap='viridis', alpha=0.8)
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.set_zlabel('f(x,y)')
ax1.set_title('$f(x,y) = x^2 + y^2$')

# Partial w.r.t x (fix y=1)
ax2 = fig.add_subplot(132)
y_fixed = 1
ax2.plot(x, f(x, y_fixed), 'b-', linewidth=2, label=f'$f(x, {y_fixed}) = x^2 + {y_fixed**2}$')
ax2.plot(x, df_dx(x, y_fixed), 'r--', linewidth=2, label=f'$∂f/∂x = 2x$')
ax2.axhline(0, color='black', linewidth=0.5)
ax2.set_xlabel('x')
ax2.set_title(f'Slice at y = {y_fixed}')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Partial w.r.t y (fix x=1)
ax3 = fig.add_subplot(133)
x_fixed = 1
ax3.plot(y, f(x_fixed, y), 'b-', linewidth=2, label=f'$f({x_fixed}, y) = {x_fixed**2} + y^2$')
ax3.plot(y, df_dy(x_fixed, y), 'r--', linewidth=2, label=f'$∂f/∂y = 2y$')
ax3.axhline(0, color='black', linewidth=0.5)
ax3.set_xlabel('y')
ax3.set_title(f'Slice at x = {x_fixed}')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("Partial derivatives: differentiate w.r.t. one variable, treat others as constants")

## Part 2: Gradient as Direction of Steepest Ascent

In [None]:
# Gradient of f(x,y) = x² + y² is [2x, 2y]

x = np.linspace(-3, 3, 20)
y = np.linspace(-3, 3, 20)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)

# Gradient components
U = df_dx(X, Y)  # ∂f/∂x
V = df_dy(X, Y)  # ∂f/∂y

plt.figure(figsize=(10, 8))

# Contour plot
contour = plt.contour(X, Y, Z, levels=15, cmap='viridis')
plt.clabel(contour, inline=True, fontsize=8)

# Gradient vectors (subsample for clarity)
skip = 2
plt.quiver(X[::skip, ::skip], Y[::skip, ::skip], 
           U[::skip, ::skip], V[::skip, ::skip],
           color='red', alpha=0.7, scale=50)

plt.xlabel('x')
plt.ylabel('y')
plt.title('Gradient Vectors (red) on Contour Plot\nGradient points toward steepest ascent')
plt.axis('equal')
plt.grid(True, alpha=0.3)
plt.show()

print("Notice: Gradient vectors are perpendicular to contour lines!")
print("They point toward increasing function values (away from minimum at origin).")

## Part 3: Gradient Descent Visualization

In [None]:
def gradient_descent_2d(start, lr, n_steps, f, grad_f):
    """Run gradient descent and return trajectory."""
    trajectory = [start]
    point = np.array(start, dtype=float)
    
    for _ in range(n_steps):
        grad = np.array(grad_f(point[0], point[1]))
        point = point - lr * grad
        trajectory.append(point.copy())
    
    return np.array(trajectory)

# Function: f(x,y) = x² + 4y² (elongated bowl)
def f_bowl(x, y):
    return x**2 + 4*y**2

def grad_bowl(x, y):
    return [2*x, 8*y]

# Run gradient descent
start = [3.0, 2.0]
trajectory = gradient_descent_2d(start, lr=0.1, n_steps=20, f=f_bowl, grad_f=grad_bowl)

# Plot
x = np.linspace(-4, 4, 50)
y = np.linspace(-3, 3, 50)
X, Y = np.meshgrid(x, y)
Z = f_bowl(X, Y)

plt.figure(figsize=(10, 8))
plt.contour(X, Y, Z, levels=20, cmap='viridis')
plt.plot(trajectory[:, 0], trajectory[:, 1], 'ro-', markersize=6, linewidth=2, label='GD trajectory')
plt.scatter([0], [0], marker='*', s=200, c='gold', edgecolor='black', zorder=5, label='Minimum')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Gradient Descent on $f(x,y) = x^2 + 4y^2$')
plt.legend()
plt.axis('equal')
plt.grid(True, alpha=0.3)
plt.show()

print("Gradient descent moves opposite to gradient (toward minimum).")
print("Notice the oscillation in the steep y-direction!")

## Part 4: Hessian and Curvature

In [None]:
# For f(x,y) = x² + 4y²
# Hessian H = [[2, 0], [0, 8]]
# Eigenvalues: 2 and 8 (curvature in each direction)

H = np.array([[2, 0], [0, 8]])
eigenvalues, eigenvectors = np.linalg.eig(H)

print("Hessian matrix:")
print(H)
print(f"\nEigenvalues: {eigenvalues}")
print(f"Eigenvectors:\n{eigenvectors}")
print(f"\nCondition number κ = λ_max/λ_min = {max(eigenvalues)/min(eigenvalues)}")

# Visualize curvature
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Slice along x (low curvature)
x = np.linspace(-3, 3, 100)
axes[0].plot(x, x**2, 'b-', linewidth=2)
axes[0].set_title(f'Slice along x: curvature = {H[0,0]}')
axes[0].set_xlabel('x')
axes[0].set_ylabel('f')
axes[0].grid(True, alpha=0.3)

# Slice along y (high curvature)
y = np.linspace(-3, 3, 100)
axes[1].plot(y, 4*y**2, 'r-', linewidth=2)
axes[1].set_title(f'Slice along y: curvature = {H[1,1]}')
axes[1].set_xlabel('y')
axes[1].set_ylabel('f')
axes[1].grid(True, alpha=0.3)

plt.suptitle('Different curvatures along different directions', fontsize=12)
plt.tight_layout()
plt.show()

print("\nHigh condition number = elongated contours = harder optimization")

## Part 5: Jacobian for Vector Functions

In [None]:
# Linear transformation: y = Ax
A = np.array([[2, 1], [0, 3]])

print("Linear transformation y = Ax with:")
print(f"A = \n{A}")
print(f"\nThe Jacobian IS the matrix A!")
print(f"J = ∂y/∂x = A")

# Visualize transformation
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Original points (unit circle)
theta = np.linspace(0, 2*np.pi, 100)
circle = np.array([np.cos(theta), np.sin(theta)])

# Transformed points
transformed = A @ circle

axes[0].plot(circle[0], circle[1], 'b-', linewidth=2)
axes[0].set_title('Original: Unit Circle')
axes[0].axis('equal')
axes[0].grid(True, alpha=0.3)
axes[0].set_xlim(-4, 4)
axes[0].set_ylim(-4, 4)

axes[1].plot(transformed[0], transformed[1], 'r-', linewidth=2)
axes[1].set_title('Transformed: Ellipse')
axes[1].axis('equal')
axes[1].grid(True, alpha=0.3)
axes[1].set_xlim(-4, 4)
axes[1].set_ylim(-4, 4)

plt.suptitle('Linear Transformation: Jacobian = A', fontsize=12)
plt.tight_layout()
plt.show()

print(f"\nSingular values of A: {np.linalg.svd(A)[1]}")
print("These determine how much the transformation stretches in each direction.")

## Summary

**Key insights:**
1. **Partial derivative**: Rate of change along one axis
2. **Gradient**: Points toward steepest ascent, perpendicular to contours
3. **Jacobian**: Matrix of all partial derivatives for vector functions
4. **Hessian**: Captures curvature; eigenvalues = curvatures along principal directions