In [1]:
import numpy as np
import matplotlib.pyplot as plt

Multiple variable linear regression equation:
$$ f_{\mathbf{w},b}(\mathbf{x}) = \mathbf{w} \cdot \mathbf{x} + b  \tag{2} $$ 
Cost function with multiple variables:
$$J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 \tag{3}$$ 
Gradient descent with multiple variables:
$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline\;
& w_j = w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} \tag{5}  \; & \text{for j = 0..n-1}\newline
&b\ \ = b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b}  \newline \rbrace
\end{align*}$$

where, n is the number of features, parameters $w_j$,  $b$, are updated simultaneously and where  

$$
\begin{align}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \tag{6}  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \tag{7}
\end{align}
$$
* m is the number of training examples in the data set
* $\frac{\partial J(\mathbf{w},b)}{\partial w_j}$ and $\frac{\partial J(\mathbf{w},b)}{\partial b}$ are gradients


In [2]:
def calculate_gradient(X, y, w, b ):
    """
    Calculates gradient for linear regression
    Args:
        X (ndarray, (m,n)): data, m examples with n features
        y (ndarray, (m,)) : target values
        w (ndarray, (n,)) : model parameters (weights)
        b (scalar)        : model parameter (bias)
        
    Returns:
        dj_dw (ndarray, (n,)): gradient of cost wrt to parameters w.
        dj_db (scalar)       : gradient of cost wrt to parameter b.
    """
    m,n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.
    
    for i in range(m):
        err = (np.dot(w,X[i]) + b) - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err * X[i,j] 
        dj_db = dj_db + err
    
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    
    return dj_dw, dj_db

In [3]:
def calculate_cost(X, y, w, b):
    """
    Calculates cost for multiple variable linear regression
    Args:
        X (ndarray, (m,n)): data, m examples with n features
        y (ndarray, (m,)) : target values
        w (ndarray, (n,)) : model parameters (weights)
        b (scalar)        : model parameters (bias)
    
    Returns:
        cost (scalar)     : cost (error between predictions and target values)
    """
    
    m = X.shape[0]
    cost = 0.
    for i in range(m):
        p = (np.dot(w, X[i]) + b) ## prediction
        cost = cost + (p - y[i])**2
    cost = cost / (2 * m)
    return cost   

In [None]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    """
    Performs gradient descent by updating w, b using alpha.
    Args:
        X    (ndarray, (m,n)): data, m examples with n features
        y    (ndarray, (m,)) : target values
        w_in (ndarray (n,))  : initial values for model parameters (weights)
        b_in (scalar)        : initial value for model parameter (bias)
        cost_fuction         : function that calculates cost
        gradient_function    : function that calculates gradients
        alpha (float)        : learning rate
        num_iters (int)      : number of iterations to run gradient descent
        
    Returns:
        w (ndarray, (m,n))   : updated values for model parameters (weights)
        b (scalar)           : updates value for model parameter (bias)
    """
    
    