In [1]:
import numpy as np

def cost_fn(X, y, theta):
    m = y.shape[0]
    error = X @ theta - y
    return (1 / m) * (error.T @ error)

In [3]:
def gradient(X, y, theta):
    m = y.shape[0]
    return (2 / m) * X.T @ (X @ theta - y)

In [7]:
def gradient_descent(X, y, theta, lr=1e-2):
    theta -= lr * gradient(X, y, theta)
    return theta

In [13]:
def batch_GD(X, y, lr=1e-2, n_steps=1000):
    theta = np.zeros(X.shape[1])
    for _ in range(n_steps):
        theta = gradient_descent(X, y, theta, lr)
    return theta

In [15]:
def mini_batch_GD(X, y, lr=1e-2, n_steps=1000, batch_size=32):
    m = y.shape[0]
    theta = np.zeros(X.shape[1])
    for _ in range(n_steps):
        indices = np.random.permutation(m)
        X_shuffled = X[indices]
        y_shuffled = y[indices]
        for i in range(0, m, batch_size):
            X_batch = X_shuffled[i:i+batch_size]
            y_batch = y_shuffled[i:i+batch_size]
            theta = gradient_descent(X_batch, y_batch, theta, lr)
    return theta

In [17]:
def stochastic_GD(X, y, lr=1e-2, n_steps=1000):
    m = y.shape[0]
    theta = np.zeros(X.shape[1])
    for _ in range(n_steps):
        indices = np.random.permutation(m)
        for i in indices:
            xi = X[i:i+1]
            yi = y[i:i+1]
            theta = gradient_descent(xi, yi, theta, lr)
    return theta

In [19]:
# Simple dataset
X = np.array([[1, 1], [1, 2], [1, 3], [1, 4]])  # bias included
y = np.array([2, 3, 4, 5])

theta = batch_GD(X, y)
print("Batch GD:", theta)

theta = mini_batch_GD(X, y, batch_size=2)
print("Mini-batch GD:", theta)

theta = stochastic_GD(X, y)
print("Stochastic GD:", theta)

Batch GD: [0.97050576 1.01003164]
Mini-batch GD: [0.99866454 1.00044486]
Stochastic GD: [0.99999795 1.00000066]
