In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

# Linear Regression

## Compute model output
<!-- Linear regression prediction in latex -->
$$
f_{w,b}(x) = w x + b
$$


In [7]:
def compute_model_output(x, w, b):
    """
    Computes the prediction of a linear model
    Args:
        x: input data
        w: weight
        b: bias
    Returns:
        y: output of the linear model
    """
    m = x.shape[0]
    f_wb = np.zeros(m)

    for i in range(m):
        f_wb[i] = w * x[i] + b

    return f_wb

## Cost function
<!-- Linear regression cost function in latex -->
$$
J(w,b) = \frac{1}{2n} \sum_{i=1}^n (f_{w,b}(x_i) - y_i)^2
$$

where 

* $n$ is the number of training examples
* $x_i$ is the $i$-th input
* $y_i$ is the $i$-th output
* $f_{w,b}(x)$ is the model output


In [4]:
def compute_cost(x, y, w, b):
    """
    Computes the cost of a linear model
    Args:
        x: input data
        y: output data
        w: weight
        b: bias
    Returns:
        cost: cost of the linear model
    """
    m = x.shape[0]
    f_wb = compute_model_output(x, w, b)
    cost = 0

    for i in range(m):
        cost += (f_wb[i] - y[i]) ** 2

    return cost / (2 * m)

## Gradient descent
<!-- Gradient descent in latex -->
$$
\begin{align}
w &\leftarrow w - \alpha \frac{\partial J}{\partial w} \\
b &\leftarrow b - \alpha \frac{\partial J}{\partial b}
\end{align}
$$

where

* $\alpha$ is the learning rate
* $\frac{\partial J}{\partial w}$ is the partial derivative of the cost function with respect to $w$
* $\frac{\partial J}{\partial b}$ is the partial derivative of the cost function with respect to $b$


In [9]:
def compute_gradient(x, y, w, b):
    """
    Computes the gradient of a linear model
    Args:
        x: input data
        y: output data
        w: weight
        b: bias
    Returns:
        dw: gradient of the weight
        db: gradient of the bias
    """
    m = x.shape[0]
    f_wb = compute_model_output(x, w, b)
    dw = 0
    db = 0

    for i in range(m):
        dw += (f_wb[i] - y[i]) * x[i]
        db += (f_wb[i] - y[i])

    return dw / m, db / m

In [12]:
def gradient_descent(x, y, w, b, learning_rate, num_iterations):
    """
    Performs gradient descent
    Args:
        x: input data
        y: output data
        w: weight
        b: bias
        learning_rate: learning rate
        num_iterations: number of iterations
    Returns:
        w: weight
        b: bias
    """
    for i in range(num_iterations):
        dw, db = compute_gradient(x, y, w, b)
        w -= learning_rate * dw
        b -= learning_rate * db

    return w, b

## Prediction using vectorization with multiple inputs
<!-- Linear regression prediction in latex -->
$$
f_{w,b}(x) = w^T x + b
$$

where

* $w$ is a vector of weights
* $x$ is a vector of inputs
* $b$ is a scalar bias


In [15]:
def predict(x, w, b):
    """
    Predicts the output of a linear model
    Args:
        x: input data
        w: weight
        b: bias
    Returns:
        y: output of the linear model
    """
    res = np.dot(x, w) + b
    return res

## Cost function using vectorization with multiple inputs
<!-- Linear regression cost function in latex -->
$$
J(w,b) = \frac{1}{2n} \sum_{i=1}^n (f_{w,b}(x_i) - y_i)^2
$$

where

* $n$ is the number of training examples
* $x_i$ is the $i$-th input vector
* $y_i$ is the $i$-th output
* $f_{w,b}(x)$ is the model output
* $w^T$ is the transpose of $w$


In [16]:
def compute_cost(X, y, w, b):
    """
    Computes the cost of a linear model
    Args:
        X: input data
        y: output data
        w: weight
        b: bias
    Returns:
        cost: cost of the linear model
    """
    m = X.shape[0]
    f_wb = predict(X, w, b)
    cost = 0

    for i in range(m):
        cost += (f_wb[i] - y[i]) ** 2

    return cost / (2 * m)

## Gradient descent using vectorization with multiple inputs
<!-- Gradient descent in latex -->
$$
\begin{align}
w &\leftarrow w - \alpha \frac{\partial J}{\partial w} \\
b &\leftarrow b - \alpha \frac{\partial J}{\partial b}
\end{align}
$$

where

* $\alpha$ is the learning rate
* $\frac{\partial J}{\partial w}$ is the partial derivative of the cost function with respect to $w$
* $\frac{\partial J}{\partial b}$ is the partial derivative of the cost function with respect to $b$

In [17]:
def compute_gradient(X, y, w, b):
    """
    Computes the gradient of a linear model
    Args:
        X: input data
        y: output data
        w: weight
        b: bias
    Returns:
        dw: gradient of the weight
        db: gradient of the bias
    """
    m = X.shape[0]
    f_wb = predict(X, w, b)
    dw = 0
    db = 0

    for i in range(m):
        dw += (f_wb[i] - y[i]) * X[i]
        db += (f_wb[i] - y[i])

    return dw / m, db / m

In [21]:
def gradient_descent(X, y, w, b, learning_rate, num_iterations):
    """
    Performs gradient descent
    Args:
        X: input data
        y: output data
        w: weight
        b: bias
        learning_rate: learning rate
        num_iterations: number of iterations
    Returns:
        w: weight
        b: bias
    """
    for i in range(num_iterations):
        dw, db = compute_gradient(X, y, w, b)
        w -= learning_rate * dw
        b -= learning_rate * db

    return w, b