In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Training set:

# the input feature (size in 1000 square feet):
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])

# the output target (price in $1000):
y_train = np.array([460, 232, 178])

print(f"x_train = {X_train}")
print(f"y_train = {y_train}")

x_train = [[2104    5    1   45]
 [1416    3    2   40]
 [ 852    2    1   35]]
y_train = [460 232 178]


In [3]:
m = X_train.shape
print(f"x_train shape is {m}")

x_train shape is (3, 4)


In [4]:
x_row1 = X_train[0]
y_row1 = y_train[0]
x_row2 = X_train[1]
y_row2 = y_train[1]
x_row3 = X_train[2]
y_row3 = y_train[2]
print(f"x_row1 = {x_row1}")
print(f"y_row1 = {y_row1}")
print(f"x_row2 = {x_row2}")
print(f"y_row2 = {y_row2}")
print(f"x_row3 = {x_row3}")
print(f"y_row3 = {y_row3}")

x_row1 = [2104    5    1   45]
y_row1 = 460
x_row2 = [1416    3    2   40]
y_row2 = 232
x_row3 = [852   2   1  35]
y_row3 = 178


In [5]:
# single prediction element by element (f_wb):
def compute_model_prediction_single(x_vector, w_vector, b_scalar):
    n = len(x_vector)
    p = 0
    for j in range(n):
        p = p + x_vector[j] * w_vector[j]
    p = p + b_scalar
    
    return p

In [6]:
b_ini = 785.18
w_ini = np.array([0.39, 18.75, -53.36, -26.42])

y_hat_1 = compute_model_prediction_single(x_row1, w_ini, b_ini)
print(y_hat_1,"\n")

y_hat_2 = compute_model_prediction_single(x_row2, w_ini, b_ini)
print(y_hat_2,"\n")

y_hat_3 = compute_model_prediction_single(x_row3, w_ini, b_ini)
print(y_hat_3)

457.2299999999999 

230.14999999999975 

176.89999999999998


In [7]:
# single prediction using the dot product pre-existing function (f_wb):
def predict_single(x_vector, w_vector, b_scalar):
    p = np.dot(x_vector, w_vector) + b_scalar
    
    return p


In [8]:
b_ini = 785.1811367994083
w_ini = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618])

y_hat_f1 = predict_single(x_row1, w_ini, b_ini)
print(y_hat_f1,"\n")

y_hat_f2 = predict_single(x_row2, w_ini, b_ini)
print(y_hat_f2,"\n")

y_hat_f3 = predict_single(x_row3, w_ini, b_ini)
print(y_hat_f3)

459.9999976194083 

231.9999983694081 

177.99999898940814


In [9]:
# Error cost function J (return single number):
def compute_cost(X_matrix, y, w, b):
    m = len(X_matrix)
    f_wb = np.zeros(m)
    sum = 0
    for i in range(m):
        f_wb[i] = predict_single(X_matrix[i],w,b)
        sum = sum + (f_wb[i] - y[i])**2
    J = (1/(2*m)) * sum
    
    return J

In [10]:
cost = compute_cost(X_train, y_train, w_ini, b_ini)
print(cost)

1.5578904428966628e-12


In [11]:
# Partial derivatives dJ_dw & dJ_db:
def compute_derivatives(X, y, w, b):
    m,n = X.shape
    dJ_dw_sum = np.zeros(n)
    dJ_db_sum = 0
    for i in range(m):
        for j in range(n):
            dJ_dw_sum[j] = dJ_dw_sum[j] + (np.dot(w,X[i]) + b - y[i]) * X[i, j]
        dJ_db_sum = dJ_db_sum + (np.dot(w,X[i]) + b - y[i])
    dJ_dw = dJ_dw_sum/m
    dJ_db = dJ_db_sum/m
    
    return dJ_dw, dJ_db

In [12]:
# Definition of gradient descent iterations to find w and b that minimize the error cost:
def gradient_descent(x, y, w_in, b_in, alpha, iters, cost_function, deriv_function):
    w = w_in
    b = b_in
    for i in range(iters):
        dJ_dw, dJ_db = deriv_function(x, y, w, b)
        w = w - alpha * dJ_dw
        b = b - alpha * dJ_db
        
    return w, b

In [13]:
# Executing gradient descent:
w_init = np.zeros(4)
b_init = 0
iterations = 1000
tmp_alpha = 5.0e-7
w_final, b_final = gradient_descent(X_train, y_train, w_init, b_init, tmp_alpha, iterations, compute_cost, compute_derivatives)
print(f"(w,b) found by gradient descent is ({w_final},{b_final})")

(w,b) found by gradient descent is ([ 0.20396569  0.00374919 -0.0112487  -0.0658614 ],-0.002235407530932535)
