In [1]:
import numpy as np

In [2]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([470, 212, 200])
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7

In [3]:
def compute_model_output(X, w, b):
    m = X.shape[0]
    predictions = np.zeros(m)
    for i in range(m):
        predictions[i] = np.dot(X[i], w) + b
    return predictions

In [4]:
print(compute_model_output(X_train, w_init, b_init))

[459.99999762 231.99999837 177.99999899]


In [5]:
def compute_gradient(X, y, w, b):
    m = X.shape[0]
    n = X.shape[1]
    dj_dw = np.zeros(n)
    dj_db = 0
    for i in range(m):
        f_wb = np.dot(X[i], w) + b
        err = f_wb - y[i]
        for j in range(n):
            current_val = X[i, j]
            dj_dw[j] = dj_dw[j] + current_val * err
        dj_db += err
    dj_dw /= m
    dj_db /= m
    return dj_dw, dj_db


In [6]:
def compute_cost(X, y, w, b):
    m = X.shape[0]
    total_cost = 0
    for i in range(m):
        f_wb = np.dot(X[i], w) + b
        err = (f_wb - y[i]) ** 2
        total_cost += err
    total_cost /= 2 * m
    return total_cost

In [7]:
def gradient_descent(X, y, w_init, b_init, cost_function, gradient_function, alpha, iterations):
    w = w_init
    b = b_init
    for i in range(iterations):
        dj_dw, dj_db = gradient_function(X, y, w, b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
    return w, b

In [8]:
tmp_dj_dw, tmp_dj_db = compute_gradient(X_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: -4.000001673925112
dj_dw at initial w,b: 
 [-3.82133606e+03 -1.13333396e+01  2.66666445e+00 -1.40000069e+02]


In [9]:
print(compute_cost(X_train, y_train, w_init, b_init))

164.0000044757007


In [10]:
# run gradient descent 
w_final, b_final = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

b,w found by gradient descent: -0.00,[ 0.20503007  0.00502456 -0.01414997 -0.04465816] 
prediction: 429.38, target value: 470
prediction: 288.52, target value: 212
prediction: 173.12, target value: 200
