In [2]:
import copy, math
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(precision=2)

In [3]:
X_train = np.array([[2104,5,1,45], [1416,3,2,40], [852,2,1,35]])
y_train = np.array([460, 232, 178])

In [4]:
X_train.shape

(3, 4)

In [5]:
type(X_train)

numpy.ndarray

In [6]:
X_train

array([[2104,    5,    1,   45],
       [1416,    3,    2,   40],
       [ 852,    2,    1,   35]])

In [7]:
y_train.shape

(3,)

In [8]:
type(y_train)

numpy.ndarray

In [9]:
y_train

array([460, 232, 178])

In [10]:
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])

In [11]:
w_init.shape

(4,)

In [12]:
type(b_init)

float

In [16]:
def predict(x, w, b):
    p = np.dot(w,x) + b
    return p

In [17]:
x_vec = X_train[0,:]
x_vec

array([2104,    5,    1,   45])

In [20]:
f_wb = predict(x_vec, w_init, b_init)
f_wb

459.9999976194083

In [26]:
def compute_cost(X, y, w, b):
    m = X.shape[0]
    cost = 0.0
    for i in range(m):
        f_wb_i = np.dot(X[i], w) + b
        cost += (f_wb_i - y[i])**2
    cost /= (2*m)
    return cost

In [27]:
cost = compute_cost(X_train, y_train, w_init, b_init)
cost

1.5578904428966628e-12

In [34]:
def compute_gradient(X, y, w, b):
    m, n = X.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.
    
    for i in range(m):
        err = np.dot(X[i], w) + b - y[i]
        dj_db += err
        for j in range(n):
            dj_dw[j] += err * X[i, j]
    dj_dw /= m
    dj_db /= m    
    return dj_db, dj_dw

In [35]:
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)
print(tmp_dj_db, tmp_dj_dw)

-1.6739251501955248e-06 [-2.73e-03 -6.27e-06 -2.22e-06 -6.92e-05]


In [37]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    J_history = []
    w = copy.deepcopy(w_in)
    b = b_in
    
    for i in range(num_iters):
        dj_db, dj_dw = gradient_function(X, y, w, b)
        
        w -= alpha * dj_dw
        b -= alpha * dj_db        
        
        if i<100000:
            J_history.append(cost_function(X, y, w, b))
            
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f} ")
            
    return w, b, J_history

In [39]:
initial_w = np.zeros_like(w_init)
initial_b = 0.
iterations = 1000
alpha = 5.0e-7
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

Iteration    0: Cost  2529.46 
Iteration  100: Cost   695.99 
Iteration  200: Cost   694.92 
Iteration  300: Cost   693.86 
Iteration  400: Cost   692.81 
Iteration  500: Cost   691.77 
Iteration  600: Cost   690.73 
Iteration  700: Cost   689.71 
Iteration  800: Cost   688.70 
Iteration  900: Cost   687.69 
b,w found by gradient descent: -0.00,[ 0.2   0.   -0.01 -0.07] 
prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178
