In [3]:
import numpy as np

def gradient_descent(X, y, weights, learning_rate, n_iterations, batch_size=1, method='batch'):
    m = len(y)
    weights = weights.copy()   # avoid modifying original
    
    for _ in range(n_iterations):
        if method == 'batch':
            # full dataset
            y_pred = X @ weights
            gradient = (2/m) * (X.T @ (y_pred - y))
            weights -= learning_rate * gradient

        elif method == 'stochastic':
            # update weights for each sample
            for i in range(m):
                xi = X[i].reshape(1, -1)
                yi = y[i]
                y_pred = xi @ weights
                gradient = 2 * xi.T * (y_pred - yi)
                weights -= learning_rate * gradient.flatten()

        elif method == 'mini_batch':
            # go through dataset in chunks of batch_size
            for i in range(0, m, batch_size):
                xb = X[i:i+batch_size]
                yb = y[i:i+batch_size]
                y_pred = xb @ weights
                gradient = (2/len(yb)) * (xb.T @ (y_pred - yb))
                weights -= learning_rate * gradient

        else:
            raise ValueError("method must be 'batch', 'stochastic', or 'mini_batch'")
    
    return weights
    
#Usage example
X = np.array([[1, 1], [2, 1], [3, 1], [4, 1]])
y = np.array([2, 3, 4, 5])
weights = np.zeros(X.shape[1])

print("Batch:      ", gradient_descent(X, y, weights, 0.01, 1000, method='batch'))
print("Stochastic: ", gradient_descent(X, y, weights, 0.01, 1000, method='stochastic'))
print("Mini-batch: ", gradient_descent(X, y, weights, 0.01, 1000, batch_size=2, method='mini_batch'))

Batch:       [1.01003164 0.97050576]
Stochastic:  [1.00000058 0.99999813]
Mini-batch:  [1.0003804  0.99883421]
