# This is a model prediction with multiple variables

In [1]:
import pandas as pd
import numpy as np
from matplotlib.ticker import FuncFormatter
import matplotlib.pyplot as plt
import math, copy

In [2]:
data = pd.read_csv('houses.csv',header=None)
print(data)

         0    1    2     3      4
0    952.0  2.0  1.0  65.0  271.5
1   1244.0  3.0  1.0  64.0  300.0
2   1947.0  3.0  2.0  17.0  509.8
3   1725.0  3.0  2.0  42.0  394.0
4   1959.0  3.0  2.0  15.0  540.0
..     ...  ...  ...   ...    ...
95  1224.0  2.0  2.0  12.0  329.0
96  1432.0  2.0  1.0  43.0  388.0
97  1660.0  3.0  2.0  19.0  390.0
98  1212.0  3.0  1.0  20.0  356.0
99  1050.0  2.0  1.0  65.0  257.8

[100 rows x 5 columns]


In [6]:
X_train = data.iloc[:, :4].values
y_train = data.iloc[:,4].values

In [7]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


In [8]:
def compute_cost(X, y, w, b): 
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      cost (scalar): cost
    """
    m = X.shape[0]
    cost = 0.0
    for i in range(m):                                
        f_wb_i = np.dot(X[i], w) + b           #(n,)(n,) = scalar (see np.dot)
        cost = cost + (f_wb_i - y[i])**2       #scalar
    cost = cost / (2 * m)                      #scalar    
    return cost

In [26]:
def compute_gradient(X, y, w, b): 
    """
    Computes the gradient for linear regression 
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters  
      b (scalar)       : model parameter
      
    Returns:
      dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w. 
      dj_db (scalar):       The gradient of the cost w.r.t. the parameter b. 
    """
    m,n = X.shape    
    print(f'Shape {m} rows {n} columns')      
    dj_dw = np.zeros((n,))
    dj_db = 0.

    for i in range(m):                             
        err = (np.dot(X[i], w) + b) - y[i]   
        for j in range(n):                       
            dj_dw[j] = dj_dw[j] + err * X[i, j]    
        dj_db = dj_db + err                        
    dj_dw = dj_dw / m                                
    dj_db = dj_db / m                                
        
    print(dj_dw)
    return dj_db, dj_dw

In [22]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 
    """
    Performs batch gradient descent to learn theta. Updates theta by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
      X (ndarray (m,n))   : Data, m examples with n features
      y (ndarray (m,))    : target values
      w_in (ndarray (n,)) : initial model parameters  
      b_in (scalar)       : initial model parameter
      cost_function       : function to compute cost
      gradient_function   : function to compute the gradient
      alpha (float)       : Learning rate
      num_iters (int)     : number of iterations to run gradient descent
      
    Returns:
      w (ndarray (n,)) : Updated values of parameters 
      b (scalar)       : Updated value of parameter 
      """
    
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db,dj_dw = gradient_function(X, y, w, b)   

        w = w - alpha * dj_dw               
        b = b - alpha * dj_db              
      
        if i<100000:      # prevent resource exhaustion 
            J_history.append(cost_function(X, y, w, b))

        
    return w, b, J_history #return final w,b and J history for graphing

In [27]:
# initialize parameters
initial_w = np.zeros(4)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(
    X_train, 
    y_train, 
    initial_w, 
    initial_b,
    compute_cost,
    compute_gradient, 
    alpha, 
    iterations)

print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

Shape 100 rows 4 columns
[-5.48850468e+05 -1.00137228e+03 -5.16041620e+02 -1.24551926e+04]
Shape 100 rows 4 columns
[46555.50684916    91.95286226    52.68922416  1764.60654519]
Shape 100 rows 4 columns
[-3.96735698e+03 -8.23762528e-01  4.42937099e+00  5.57666308e+02]
Shape 100 rows 4 columns
[319.74277633   7.04567724   8.52395036 659.75435641]
Shape 100 rows 4 columns
[-44.02910078   6.37484773   8.17602117 650.7656248 ]
Shape 100 rows 4 columns
[-13.15363092   6.42869985   8.20505901 651.20240735]
Shape 100 rows 4 columns
[-15.76576489   6.42106089   8.20210972 650.83954302]
Shape 100 rows 4 columns
[-15.53632883   6.41864115   8.20187491 650.54468259]
Shape 100 rows 4 columns
[-15.54801634   6.41577994   8.20140999 650.24420191]
Shape 100 rows 4 columns
[-15.53924703   6.41295761   8.20096482 649.94434827]
Shape 100 rows 4 columns
[-15.53221715   6.4101334    8.2005182  649.64459148]
Shape 100 rows 4 columns
[-15.52504326   6.40731075   8.20007192 649.34497646]
Shape 100 rows 4 col