In [17]:
#### Notebook Imports
import numpy as np

In [18]:
from random import randint as rand

### CS229 Week 1 Algorithms
---
1. Linear Model (for regression)
2. Least Mean Squares cost function
3. Batch Gradient Descent
4. Stochastic Gradient Descent
5. Normal Equations

### Linear Model (Hypothesis Function)
---
\begin{equation}
h_\theta(x) = \sum_{i=0}^{n} \theta_ix_i
\end{equation}

Here: $\theta_0$ will be bias/intercept of the linear equation and $x_0$ will be a 1 vector

In [38]:
h = lambda theta, x: 1 + np.sum(theta*x)

### Least Mean Squares
---
\begin{equation}
J(\theta) = \frac{1}{2} \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)})^2 \\
\frac{\partial J}{\partial \theta_j} = \sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)})x^{(i)}_j
\end{equation}

In [36]:
J = lambda theta, x, y: (1/2)*np.sum(h(theta, x) - y)**2

# dJ_dtheta = lambda theta, x, y, xj: np.sum(h(theta, x) - y)*xj
def dJ_dtheta(theta, x, y, xj):
    s = h(theta, np.ones(x.shape)) - y[0]
    for i in range(len(x)):
        s += (h(theta[i], x[i]) - y[i])
    return s

### Batch Gradient Descent
---

\begin{equation}
\theta_{j+1} := \theta_j - \alpha \frac{\partial J}{\partial \theta_j} \\
\theta_{j+1} := \theta_j - \alpha (\sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)})x^{(i)}_j)
\end{equation}
repeat until convergence {
\begin{equation}
\theta_{j+1} := \theta_j - \alpha (\sum_{i=1}^{m} (h_\theta(x^{(i)}) - y^{(i)})x^{(i)}_j)
\end{equation}
}

In [40]:
nextTheta = lambda theta, X, Y, alpha: theta - alpha*dJ_dtheta(theta, X, Y, 1)

### Stochastic Gradient Descent
---

\begin{equation}
\theta_{j+1} := \theta_j - \alpha \frac{\partial J}{\partial \theta_j} \\
\theta_{j+1} := \theta_j - \alpha (h_\theta(x^{(i)}) - y^{(i)})x^{(i)}_j
\end{equation}
repeat until converge {
\begin{equation}
\theta_{j+1} := \theta_j - \alpha (h_\theta(x^{(i)}) - y^{(i)})x^{(i)}_j
\end{equation}
}

In [8]:
stochastic_dJ_dtheta = lambda theta, x, y: y[rand(0, len(y))] - h(theta, x[rand(0, len(x))]) * x[rand(0, len(x))]

In [64]:
stochastic_nextTheta = lambda theta, X, Y, alpha: theta + alpha*stochastic_dJ_dtheta(theta, X, Y)

### Normal Equations
---

\begin{equation}
\theta = (X^TX)^{-1}X^Ty
\end{equation}

In [9]:
ftheta = lambda X, Y: np.linalg.pinv(np.transpose(X)*X)*np.transpose(X)*Y

In [5]:
from sklearn import datasets
import pandas as pd

In [6]:
df = pd.read_csv("ex1data1.txt")

In [7]:
df.columns = ['X', 'Y']

In [8]:
X = df['X']
Y = df['Y']

In [11]:
Y

0      9.13020
1     13.66200
2     11.85400
3      6.82330
4     11.88600
        ...   
91     7.20290
92     1.98690
93     0.14454
94     9.05510
95     0.61705
Name: Y, Length: 96, dtype: float64

In [12]:
del df

In [15]:
X, Y = X.to_numpy(), Y.to_numpy()

In [16]:
X

array([ 5.5277,  8.5186,  7.0032,  5.8598,  8.3829,  7.4764,  8.5781,
        6.4862,  5.0546,  5.7107, 14.164 ,  5.734 ,  8.4084,  5.6407,
        5.3794,  6.3654,  5.1301,  6.4296,  7.0708,  6.1891, 20.27  ,
        5.4901,  6.3261,  5.5649, 18.945 , 12.828 , 10.957 , 13.176 ,
       22.203 ,  5.2524,  6.5894,  9.2482,  5.8918,  8.2111,  7.9334,
        8.0959,  5.6063, 12.836 ,  6.3534,  5.4069,  6.8825, 11.708 ,
        5.7737,  7.8247,  7.0931,  5.0702,  5.8014, 11.7   ,  5.5416,
        7.5402,  5.3077,  7.4239,  7.6031,  6.3328,  6.3589,  6.2742,
        5.6397,  9.3102,  9.4536,  8.8254,  5.1793, 21.279 , 14.908 ,
       18.959 ,  7.2182,  8.2951, 10.236 ,  5.4994, 20.341 , 10.136 ,
        7.3345,  6.0062,  7.2259,  5.0269,  6.5479,  7.5386,  5.0365,
       10.274 ,  5.1077,  5.7292,  5.1884,  6.3557,  9.7687,  6.5159,
        8.5172,  9.1802,  6.002 ,  5.5204,  5.0594,  5.7077,  7.6366,
        5.8707,  5.3054,  8.2934, 13.394 ,  5.4369])

In [43]:
def LinearRegression(x, y, learningRate = 0.01, epochs = 1000):
    theta = np.zeros(x.shape)
    T = None
    for i in range(epochs):
        theta = nextTheta(theta, x, y, learningRate)
    return theta