In [171]:
import numpy as np
import matplotlib as plt
import math 
import copy


# Computing Cost

In [172]:
def compute_cost(x,y,w,b):
    m = x.shape[0]
    total_cost = 0.0
    for i in range(m):
        cost = (np.dot(w,x[i]) + b - y[i])**2
        total_cost = total_cost + cost
    total_cost = total_cost/(2*m)
    return total_cost



In [173]:
x_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460,232,178])
b_init = 785.1811367994083
w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])
compute_cost(x_train,y_train,w_init,b_init)

1.5578904428966628e-12

# Now Computing Gradient

In [174]:
def compute_gradient(x,y,w,b):
    m,n = x.shape
    dj_dw = np.zeros(n)
    dj_db = 0.
    for i in range(m):
        error = np.dot(x[i],w) + b - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] +  error * x[i][j]
        dj_db = dj_db + error
    dj_dw = 1/m * dj_dw
    dj_db = 1/m * dj_db
    return dj_db,dj_dw
        


In [175]:
tmp_dj_db, tmp_dj_dw = compute_gradient(x_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: -1.6739251501955246e-06
dj_dw at initial w,b: 
 [-2.72623577e-03 -6.27197263e-06 -2.21745578e-06 -6.92403391e-05]


# Using Gradient Descent Algorithm

In [176]:
def gradient_descent(x_train,y_train,w,b,compute_cost,compute_gradient,alpha,num_iter):

    J = []
    w_new =  copy.deepcopy(w)
    b_new = b
    for i in range(num_iter):
        dj_db ,dj_dw = compute_gradient(x_train,y_train,w_new,b_new)
        w_new = w_new - alpha * dj_dw
        b_new = b_new - alpha * dj_db
        if i<100000:  
               J.append(compute_cost(x_train,y_train,w_new,b_new))
               if(i%math.ceil(num_iter/10)==0):
                    print(f"Iteration: {i}: Cost: {J[-1]:.4f}")
    return w_new,b_new,J


In [177]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
J_hist = []
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent 
w_final, b_final, J_hist = gradient_descent(x_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = x_train.shape
for i in range(m):
    print(f"prediction: {np.dot(x_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

Iteration: 0: Cost: 2529.4630
Iteration: 100: Cost: 695.9903
Iteration: 200: Cost: 694.9207
Iteration: 300: Cost: 693.8604
Iteration: 400: Cost: 692.8094
Iteration: 500: Cost: 691.7676
Iteration: 600: Cost: 690.7349
Iteration: 700: Cost: 689.7112
Iteration: 800: Cost: 688.6965
Iteration: 900: Cost: 687.6906
b,w found by gradient descent: -0.00,[ 0.20396569  0.00374919 -0.0112487  -0.0658614 ] 
prediction: 426.19, target value: 460
prediction: 286.17, target value: 232
prediction: 171.47, target value: 178
