In [45]:
#### Notebook Imports
import numpy as np

In [74]:
from random import randint as rand

### CS229 Week 1 Algorithms
---
1. Linear Model (for regression)
2. Least Mean Squares cost function
3. Batch Gradient Descent
4. Stochastic Gradient Descent
5. Normal Equations

### Linear Model (Hypothesis Function)
---
\begin{equation}
h_\theta(x) = \sum_{i=0}^{n} \theta_ix_i
\end{equation}

Here: $\theta_0$ will be bias/intercept of the linear equation and $x_0$ will be a 1 vector

In [46]:
h = lambda theta, x: np.sum(theta*x)

### Least Mean Squares
---
\begin{equation}
J(\theta) = \frac{1}{2} \sum_{i=1}^{m} (y^{(i)} - h_\theta(x^{(i)}))^2 \\
\frac{\partial J}{\partial \theta_j} = \sum_{i=1}^{m} (y^{(i)} - h_\theta(x^{(i)}))x^{(i)}_j
\end{equation}

In [47]:
np.transpose(np.array([
    [1,1,1,1],
    [23,12,34,55],
    [34,56,63,22]
]))

array([[ 1, 23, 34],
       [ 1, 12, 56],
       [ 1, 34, 63],
       [ 1, 55, 22]])

In [52]:
J = lambda theta, x, y: (1/2)*np.sum(h(theta, x) - y)**2

dJ_dtheta = lambda theta, x, y: np.sum( y - h(theta, x) * x)

### Batch Gradient Descent
---

\begin{equation}
\theta_{j+1} := \theta_j + \alpha \frac{\partial J}{\partial \theta_j} \\
\theta_{j+1} := \theta_j + \alpha (\sum_{i=1}^{m} (y^{(i)} - h_\theta(x^{(i)}))x^{(i)}_j)
\end{equation}
repeat until convergence {
\begin{equation}
\theta_{j+1} := \theta_j + \alpha (\sum_{i=1}^{m} (y^{(i)} - h_\theta(x^{(i)}))x^{(i)}_j)
\end{equation}
}

In [54]:
nextTheta = lambda theta, X, Y, alpha: theta + alpha*dJ_dtheta(theta, X, Y)

### Stochastic Gradient Descent
---

\begin{equation}
\theta_{j+1} := \theta_j + \alpha \frac{\partial J}{\partial \theta_j} \\
\theta_{j+1} := \theta_j + \alpha (y^{(i)} - h_\theta(x^{(i)}))x_j
\end{equation}
repeat until converge {
\begin{equation}
\theta_{j+1} := \theta_j + \alpha (y^{(i)} - h_\theta(x^{(i)}))x_j
\end{equation}
}

In [76]:
stochastic_dJ_dtheta = lambda theta, x, y: y[rand(0, len(y))] - h(theta, x[rand(0, len(x))]) * x[rand(0, len(x))]

In [77]:
stochastic_nextTheta = lambda theta, X, Y, alpha: theta + alpha*stochastic_dJ_dtheta(theta, X, Y)

### Normal Equations
---

\begin{equation}
\theta = (X^TX)^{-1}X^Ty
\end{equation}

In [56]:
ftheta = lambda X, y: np.linalg.pinv(np.transpose(X)*X)*np.transpose(X)*y