# Tutorial 03: Directional Derivatives

Understanding rates of change in any direction.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)

## Part 1: Directional Derivative = Gradient · Direction

In [None]:
# f(x, y) = x² + y²
def f(x, y):
    return x**2 + y**2

def gradient(x, y):
    return np.array([2*x, 2*y])

def directional_derivative(x, y, direction):
    """Compute directional derivative at (x,y) in given direction."""
    v = direction / np.linalg.norm(direction)  # Normalize
    grad = gradient(x, y)
    return np.dot(grad, v)

# Test at point (1, 1)
point = (1, 1)
grad = gradient(*point)

print(f"At point {point}:")
print(f"Gradient = {grad}")
print(f"Gradient magnitude = {np.linalg.norm(grad):.4f}")

# Test various directions
directions = [
    ([1, 0], 'Along x-axis'),
    ([0, 1], 'Along y-axis'),
    ([1, 1], 'Diagonal (45°)'),
    (grad, 'Along gradient (steepest)'),
    ([-grad[1], grad[0]], 'Perpendicular to gradient'),
]

print("\nDirectional derivatives:")
for dir_vec, name in directions:
    dd = directional_derivative(*point, np.array(dir_vec))
    print(f"  {name}: D_v f = {dd:.4f}")

## Part 2: Visualize Directional Derivatives

In [None]:
# Plot directional derivative as function of angle
point = (1, 1)
grad = gradient(*point)
grad_magnitude = np.linalg.norm(grad)

# Angle of gradient
grad_angle = np.arctan2(grad[1], grad[0])

# Compute directional derivative for all angles
angles = np.linspace(0, 2*np.pi, 100)
dir_derivs = []

for angle in angles:
    direction = np.array([np.cos(angle), np.sin(angle)])
    dd = directional_derivative(*point, direction)
    dir_derivs.append(dd)

# Plot
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Cartesian plot
axes[0].plot(np.degrees(angles), dir_derivs, 'b-', linewidth=2)
axes[0].axhline(0, color='black', linewidth=0.5)
axes[0].axhline(grad_magnitude, color='green', linestyle='--', label=f'Max = ||∇f|| = {grad_magnitude:.2f}')
axes[0].axhline(-grad_magnitude, color='red', linestyle='--', label=f'Min = -||∇f|| = {-grad_magnitude:.2f}')
axes[0].axvline(np.degrees(grad_angle), color='green', linestyle=':', alpha=0.7)
axes[0].set_xlabel('Angle (degrees)')
axes[0].set_ylabel('Directional Derivative')
axes[0].set_title('Directional Derivative vs Direction Angle')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Polar plot
ax_polar = plt.subplot(122, projection='polar')
ax_polar.plot(angles, np.array(dir_derivs), 'b-', linewidth=2)
ax_polar.plot([grad_angle], [grad_magnitude], 'go', markersize=10, label='Max (gradient dir)')
ax_polar.plot([grad_angle + np.pi], [grad_magnitude], 'ro', markersize=10, label='Min (opposite)')
ax_polar.set_title('Polar Plot of Directional Derivative')
ax_polar.legend(loc='upper right')

plt.tight_layout()
plt.show()

print(f"D_v f = ||∇f|| cos(θ) where θ is angle between gradient and direction v")

## Part 3: Gradient Perpendicular to Level Curves

In [None]:
# Create contour plot with gradient vectors
x = np.linspace(-3, 3, 100)
y = np.linspace(-3, 3, 100)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)

plt.figure(figsize=(10, 8))

# Contours
contour = plt.contour(X, Y, Z, levels=[1, 2, 4, 8, 12], colors='blue')
plt.clabel(contour, inline=True, fontsize=10)

# Select points on a contour to show gradient
level = 4  # z = 4 contour (circle of radius 2)
theta_samples = np.linspace(0, 2*np.pi, 8, endpoint=False)
r = np.sqrt(level)

for theta in theta_samples:
    px, py = r * np.cos(theta), r * np.sin(theta)
    grad = gradient(px, py)
    grad_normalized = grad / np.linalg.norm(grad) * 0.5  # Scale for visibility
    
    plt.arrow(px, py, grad_normalized[0], grad_normalized[1],
              head_width=0.1, head_length=0.05, fc='red', ec='red')
    plt.plot(px, py, 'ko', markersize=5)

plt.xlabel('x')
plt.ylabel('y')
plt.title('Gradient (red) is Perpendicular to Level Curves (blue)\nDirectional derivative = 0 along contours!')
plt.axis('equal')
plt.grid(True, alpha=0.3)
plt.show()

## Part 4: Why Gradient Descent Works

In [None]:
# Gradient descent moves in direction of steepest descent (-gradient)

def gradient_descent_with_directions(start, lr, n_steps):
    """Track gradient descent and show why it chooses the directions it does."""
    trajectory = [np.array(start)]
    point = np.array(start, dtype=float)
    
    for _ in range(n_steps):
        grad = gradient(point[0], point[1])
        point = point - lr * grad
        trajectory.append(point.copy())
    
    return np.array(trajectory)

# Run gradient descent
start = [2.5, 2.0]
traj = gradient_descent_with_directions(start, lr=0.15, n_steps=15)

# Plot
x = np.linspace(-3, 3, 100)
y = np.linspace(-3, 3, 100)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)

plt.figure(figsize=(10, 8))
plt.contour(X, Y, Z, levels=15, cmap='viridis', alpha=0.7)

# Plot trajectory
plt.plot(traj[:, 0], traj[:, 1], 'ro-', markersize=8, linewidth=2, label='GD trajectory')

# Show gradient at each point
for i in range(min(5, len(traj)-1)):
    px, py = traj[i]
    grad = gradient(px, py)
    grad_normalized = -grad / np.linalg.norm(grad) * 0.4  # Negative = descent direction
    plt.arrow(px, py, grad_normalized[0], grad_normalized[1],
              head_width=0.1, head_length=0.05, fc='blue', ec='blue', alpha=0.7)

plt.scatter([0], [0], marker='*', s=200, c='gold', edgecolor='black', zorder=5, label='Minimum')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Gradient Descent: Blue arrows show steepest descent direction\nTrajectory follows these directions')
plt.legend()
plt.axis('equal')
plt.grid(True, alpha=0.3)
plt.show()

print("At each point, gradient descent moves in the direction")
print("that decreases the function value most rapidly.")

## Part 5: Comparing Different Directions

In [None]:
# Compare descent in different directions from same starting point

start = np.array([2.0, 1.5])
step_size = 0.5

# Different directions to try
grad = gradient(*start)
grad_unit = grad / np.linalg.norm(grad)

directions = {
    'Along x': np.array([-1, 0]),
    'Along y': np.array([0, -1]),
    'Diagonal': np.array([-1, -1]) / np.sqrt(2),
    'Steepest descent': -grad_unit,
}

# Compute improvement for each direction
f_start = f(*start)

plt.figure(figsize=(12, 5))

# Left: contour plot with directions
plt.subplot(1, 2, 1)
x = np.linspace(-0.5, 3, 50)
y = np.linspace(-0.5, 2.5, 50)
X, Y = np.meshgrid(x, y)
Z = f(X, Y)
plt.contour(X, Y, Z, levels=15, cmap='viridis', alpha=0.7)

colors = ['red', 'green', 'blue', 'purple']
improvements = {}

for (name, direction), color in zip(directions.items(), colors):
    new_point = start + step_size * direction
    f_new = f(*new_point)
    improvement = f_start - f_new
    improvements[name] = improvement
    
    plt.arrow(start[0], start[1], step_size * direction[0], step_size * direction[1],
              head_width=0.1, head_length=0.05, fc=color, ec=color, label=f'{name}')

plt.plot(*start, 'ko', markersize=10)
plt.xlabel('x')
plt.ylabel('y')
plt.title('Different descent directions from same point')
plt.legend(loc='upper right')
plt.axis('equal')

# Right: bar chart of improvements
plt.subplot(1, 2, 2)
names = list(improvements.keys())
values = list(improvements.values())
plt.bar(names, values, color=colors)
plt.ylabel('Improvement (f_old - f_new)')
plt.title('Function decrease for each direction\n(Higher = better)')
plt.xticks(rotation=45, ha='right')

plt.tight_layout()
plt.show()

print("Steepest descent direction gives maximum improvement!")
print(f"\nImprovements: {improvements}")

## Summary

**Key insights:**
1. Directional derivative $D_v f = \nabla f \cdot v$
2. Maximum when $v$ aligns with $\nabla f$ (steepest ascent)
3. Minimum when $v$ opposes $\nabla f$ (steepest descent)
4. Zero when $v \perp \nabla f$ (along level curves)
5. Gradient descent exploits this by always moving in steepest descent direction