# Tutorial 01: Single Variable Derivatives

Visualizing derivatives and understanding them intuitively.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(42)

## Part 1: Derivative as Slope of Tangent Line

In [None]:
def f(x):
    return x**2

def f_derivative(x):
    return 2*x

# Point of tangency
x0 = 1.5
y0 = f(x0)
slope = f_derivative(x0)

# Plot
x = np.linspace(-1, 3, 100)
plt.figure(figsize=(10, 6))

# Function
plt.plot(x, f(x), 'b-', linewidth=2, label='$f(x) = x^2$')

# Tangent line: y - y0 = slope * (x - x0)
tangent = y0 + slope * (x - x0)
plt.plot(x, tangent, 'r--', linewidth=2, label=f'Tangent at x={x0}: slope={slope}')

# Point of tangency
plt.scatter([x0], [y0], color='red', s=100, zorder=5)

plt.xlabel('x')
plt.ylabel('y')
plt.title('Derivative = Slope of Tangent Line')
plt.legend()
plt.grid(True, alpha=0.3)
plt.ylim(-1, 6)
plt.show()

print(f"At x = {x0}: f(x) = {y0}, f'(x) = {slope}")

## Part 2: Secant Lines Converging to Tangent

In [None]:
x0 = 1.0
y0 = f(x0)

fig, axes = plt.subplots(1, 3, figsize=(15, 5))
x = np.linspace(-0.5, 2.5, 100)

h_values = [1.0, 0.5, 0.1]

for ax, h in zip(axes, h_values):
    # Function
    ax.plot(x, f(x), 'b-', linewidth=2, label='$f(x) = x^2$')
    
    # Secant line
    x1 = x0 + h
    y1 = f(x1)
    secant_slope = (y1 - y0) / h
    secant = y0 + secant_slope * (x - x0)
    ax.plot(x, secant, 'g--', linewidth=2, alpha=0.7, label=f'Secant (h={h})')
    
    # Tangent line (true derivative)
    tangent = y0 + f_derivative(x0) * (x - x0)
    ax.plot(x, tangent, 'r-', linewidth=1.5, label='Tangent')
    
    # Points
    ax.scatter([x0, x1], [y0, y1], color='green', s=80, zorder=5)
    
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_title(f'h = {h}, secant slope = {secant_slope:.2f}')
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)
    ax.set_ylim(-0.5, 4)

plt.suptitle('Secant Lines → Tangent as h → 0', fontsize=14)
plt.tight_layout()
plt.show()

print(f"True derivative at x=1: f'(1) = {f_derivative(1)}")

## Part 3: Numerical vs Analytical Derivatives

In [None]:
def numerical_derivative(f, x, h=1e-5):
    """Central difference approximation."""
    return (f(x + h) - f(x - h)) / (2 * h)

# Test on various functions
functions = [
    ('x²', lambda x: x**2, lambda x: 2*x),
    ('x³', lambda x: x**3, lambda x: 3*x**2),
    ('exp(x)', np.exp, np.exp),
    ('ln(x)', np.log, lambda x: 1/x),
    ('sin(x)', np.sin, np.cos),
]

x_test = 2.0

print("Comparing Numerical vs Analytical Derivatives at x = 2.0")
print("=" * 60)
print(f"{'Function':<15} {'Numerical':<15} {'Analytical':<15} {'Error':<15}")
print("-" * 60)

for name, f, df in functions:
    num = numerical_derivative(f, x_test)
    ana = df(x_test)
    error = abs(num - ana)
    print(f"{name:<15} {num:<15.6f} {ana:<15.6f} {error:<15.2e}")

## Part 4: Chain Rule Visualization

In [None]:
# f(g(x)) where f(u) = u^2 and g(x) = sin(x)
# Chain rule: d/dx[sin²(x)] = 2*sin(x)*cos(x)

x = np.linspace(0, 2*np.pi, 100)

# Composite function
y = np.sin(x)**2

# Derivative: 2*sin(x)*cos(x) = sin(2x)
dy = 2 * np.sin(x) * np.cos(x)
dy_simplified = np.sin(2*x)  # Same thing!

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Plot function
axes[0].plot(x, y, 'b-', linewidth=2, label='$f(x) = \\sin^2(x)$')
axes[0].set_xlabel('x')
axes[0].set_ylabel('y')
axes[0].set_title('Function: $\\sin^2(x)$')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Plot derivative
axes[1].plot(x, dy, 'r-', linewidth=2, label="$f'(x) = 2\\sin(x)\\cos(x)$")
axes[1].plot(x, dy_simplified, 'g--', linewidth=2, alpha=0.7, label="$= \\sin(2x)$")
axes[1].axhline(0, color='black', linewidth=0.5)
axes[1].set_xlabel('x')
axes[1].set_ylabel("y'")
axes[1].set_title('Derivative via Chain Rule')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("Chain rule: d/dx[f(g(x))] = f'(g(x)) · g'(x)")
print("Here: d/dx[sin²(x)] = 2·sin(x) · cos(x)")

## Part 5: ML Activation Functions and Their Derivatives

In [None]:
x = np.linspace(-5, 5, 200)

# Activations and derivatives
activations = {
    'Sigmoid': {
        'f': lambda x: 1 / (1 + np.exp(-x)),
        'df': lambda x: (1 / (1 + np.exp(-x))) * (1 - 1 / (1 + np.exp(-x)))
    },
    'Tanh': {
        'f': np.tanh,
        'df': lambda x: 1 - np.tanh(x)**2
    },
    'ReLU': {
        'f': lambda x: np.maximum(0, x),
        'df': lambda x: (x > 0).astype(float)
    },
}

fig, axes = plt.subplots(2, 3, figsize=(15, 10))

for i, (name, funcs) in enumerate(activations.items()):
    y = funcs['f'](x)
    dy = funcs['df'](x)
    
    # Function
    axes[0, i].plot(x, y, 'b-', linewidth=2)
    axes[0, i].axhline(0, color='black', linewidth=0.5)
    axes[0, i].axvline(0, color='black', linewidth=0.5)
    axes[0, i].set_title(f'{name}: $f(x)$')
    axes[0, i].grid(True, alpha=0.3)
    axes[0, i].set_ylim(-1.5, 1.5) if name != 'ReLU' else axes[0, i].set_ylim(-1, 5)
    
    # Derivative
    axes[1, i].plot(x, dy, 'r-', linewidth=2)
    axes[1, i].axhline(0, color='black', linewidth=0.5)
    axes[1, i].axvline(0, color='black', linewidth=0.5)
    axes[1, i].set_title(f"{name}: $f'(x)$")
    axes[1, i].grid(True, alpha=0.3)
    axes[1, i].set_ylim(-0.5, 1.5)

plt.suptitle('Activation Functions and Their Derivatives', fontsize=14)
plt.tight_layout()
plt.show()

print("Notice:")
print("- Sigmoid/Tanh derivatives → 0 for large |x| (vanishing gradients!)")
print("- ReLU derivative = 1 for x > 0, 0 for x < 0 (constant gradient)")

## Summary

**Key insights:**
1. Derivative = slope of tangent line = instantaneous rate of change
2. Chain rule is the foundation of backpropagation
3. Numerical differentiation works well for verification
4. Activation function derivatives determine gradient flow