In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
# Data
df = pd.read_csv("california_housing.csv", usecols=['median_income', 'households', 'total_rooms','housing_median_age', 'median_house_value'])
df

Unnamed: 0,housing_median_age,total_rooms,households,median_income,median_house_value
0,27.0,3885.0,606.0,6.6085,344700.0
1,43.0,1510.0,277.0,3.5990,176500.0
2,27.0,3589.0,495.0,5.7934,270500.0
3,28.0,67.0,11.0,6.1359,330000.0
4,19.0,1241.0,237.0,2.9375,81700.0
...,...,...,...,...,...
2995,23.0,1450.0,607.0,1.1790,225000.0
2996,27.0,5257.0,1036.0,3.3906,237200.0
2997,10.0,956.0,220.0,2.2895,62000.0
2998,40.0,96.0,14.0,3.2708,162500.0


In [14]:
# Test vs train

In [13]:
x = df.drop('median_house_value', axis=1)
y = df.median_house_value
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [15]:
x_train.shape, y_train.shape

((2400, 4), (2400,))

<a name="toc_15456_3"></a>
# Model Prediction With Multiple Variables
The model's prediction with multiple variables is given by the linear model:

$$ f_{\mathbf{w},b}(\mathbf{x}) =  w_0x_0 + w_1x_1 +... + w_{n-1}x_{n-1} + b \tag{1}$$
or in vector notation:
$$ f_{\mathbf{w},b}(\mathbf{x}) = \mathbf{w} \cdot \mathbf{x} + b  \tag{2} $$ 
where $\cdot$ is a vector `dot product`

To demonstrate the dot product, we will implement prediction using (1) and (2).

<a name="toc_15456_4"></a>
# Compute Cost With Multiple Variables
The equation for the cost function with multiple variables $J(\mathbf{w},b)$ is:
$$J(\mathbf{w},b) = \frac{1}{2m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})^2 \tag{3}$$ 
where:
$$ f_{\mathbf{w},b}(\mathbf{x}^{(i)}) = \mathbf{w} \cdot \mathbf{x}^{(i)} + b  \tag{4} $$ 


In contrast to previous labs, $\mathbf{w}$ and $\mathbf{x}^{(i)}$ are vectors rather than scalars supporting multiple features.

<a name="toc_15456_5"></a>
# Gradient Descent With Multiple Variables
Gradient descent for multiple variables:

$$\begin{align*} \text{repeat}&\text{ until convergence:} \; \lbrace \newline\;
& w_j = w_j -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial w_j} \tag{5}  \; & \text{for j = 0..n-1}\newline
&b\ \ = b -  \alpha \frac{\partial J(\mathbf{w},b)}{\partial b}  \newline \rbrace
\end{align*}$$

where, n is the number of features, parameters $w_j$,  $b$, are updated simultaneously and where  

$$
\begin{align}
\frac{\partial J(\mathbf{w},b)}{\partial w_j}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)})x_{j}^{(i)} \tag{6}  \\
\frac{\partial J(\mathbf{w},b)}{\partial b}  &= \frac{1}{m} \sum\limits_{i = 0}^{m-1} (f_{\mathbf{w},b}(\mathbf{x}^{(i)}) - y^{(i)}) \tag{7}
\end{align}
$$
* m is the number of training examples in the data set

    
*  $f_{\mathbf{w},b}(\mathbf{x}^{(i)})$ is the model's prediction, while $y^{(i)}$ is the target value


In [17]:
import copy, math

def compute_cost(x,y,w,b):
    m = x.shape[0]
    loss = 0
    for i in range(m):
        f_wb = np.dot(x[i], w) + b
        loss += (f_wb - y[i]) ** 2
    loss = loss / (2*m)
    return loss

def compute_gradient(x,y,w,b):
    m,n = x.shape
    dj_w = np.zeros((n))
    dj_b = 0
    for i in range(m):
        err = (np.dot(x[i], w) + b) - y[i]
        for j in range(n):
            dj_w[j] = dj_w[j] + err * x[i, j]
        dj_b += err
    dj_w /= m
    dj_b /= m
    
    return dj_w, dj_b

def gradient_descent(x,y,w_in,b_in,alpha,epochs):
    w = copy.deepcopy(w_in) # avoid modifying global w within function
    b = b_in
    
    for i in range(epochs):
        dj_w, dj_b = compute_gradient(x,y,w,b)
        
        # Update
        w = w - alpha * dj_w
        b = b - alpha * dj_b
        
        if i % 1000 == 0:
            cost = compute_cost(x, y, w, b)
            print("Iteration:", i, " Cost:", cost)
    return w,b

In [28]:
x_grad = x_train.values
y_grad = y_train.values
x_grad.shape, y_grad.shape

((2400, 4), (2400,))

In [52]:
w_init = np.zeros(x_grad.shape[1])
b_init = 0
w, b = gradient_descent(x_grad, y_grad, w_init, b_init, 0.00000001, 7000)
w, b

Iteration: 0  Cost: 24656913440.144417
Iteration: 1000  Cost: 12918389935.921309
Iteration: 2000  Cost: 12745632238.986126
Iteration: 3000  Cost: 12616827886.183647
Iteration: 4000  Cost: 12514878793.919071
Iteration: 5000  Cost: 12429436836.080738
Iteration: 6000  Cost: 12354238022.591055


(array([180.68471674,  22.79321503, 154.84448855,  26.13036657]),
 4.857945713526324)

In [53]:
def predict(x,w,b):
    f_wb = []
    for i in range(x.shape[0]):
        f_wb.append(np.dot(x[i], w) + b)
    return np.array(f_wb)
y_pred = predict(x_test.values, w, b)
y_pred.shape

(600,)

In [54]:
def mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))
mae(y_test.values, y_pred)

109845.91513696435

In [44]:
# Do it with Sklearn
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(x_train, y_train)

In [45]:
model.coef_, model.intercept_

(array([ 1.97216342e+03, -1.39601307e+01,  1.14512381e+02,  4.55458692e+04]),
 -43705.15682122932)

In [47]:
y_pred = model.predict(x_test)
mae(y_test, y_pred)

55738.63035987873