In [1]:
import numpy as np
import math

In [2]:
x_train = np.array([1.0, 2.0])
y_train = np.array([300.0, 500.0])

In [3]:
def compute_cost(x, y, w, b):
    m = x.shape[0]
    cost_sum = 0
    for i in range(m):
        f_wb = w * x[i] + b
        cost = (f_wb - y[i]) ** 2
        cost_sum += cost
    total_cost = (1/ (2 * m)) * cost_sum
    return total_cost

In [4]:
def compute_gradient(x, y, w, b):
    m = x.shape[0]
    dj_dw = 0
    dj_db = 0
    for i in range(m):
        f_wb = w * x[i] + b
        dj_dw_i = (f_wb - y[i]) * x[i]
        dj_db_i = f_wb - y[i]
        dj_dw += dj_dw_i
        dj_db += dj_db_i
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    
    return dj_dw, dj_db

In [6]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, compute_cost, compute_gradient):
    b = b_in
    w = w_in
        
    for i in range(num_iters):
        dj_dw, dj_db = compute_gradient(x, y, w, b)
        b = b - alpha * dj_dw
        w = w - alpha * dj_dw
        
            
        if i% math.ceil(num_iters/10) == 0:
            print(f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
    return w, b, 

In [7]:
# initialize parameters
w_init = 0
b_init = 0
# some gradient descent settings
iterations = 10000
tmp_alpha = 1.0e-2
# run gradient descent
w_final, b_final = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha, 
                                                    iterations, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

dj_dw: -6.500e+02, dj_db: -4.000e+02   w:  6.500e+00, b: 6.50000e+00
dj_dw: -1.364e-12, dj_db:  6.250e+00   w:  1.625e+02, b: 1.62500e+02
dj_dw: -1.364e-12, dj_db:  6.250e+00   w:  1.625e+02, b: 1.62500e+02
dj_dw: -1.364e-12, dj_db:  6.250e+00   w:  1.625e+02, b: 1.62500e+02
dj_dw: -1.364e-12, dj_db:  6.250e+00   w:  1.625e+02, b: 1.62500e+02
dj_dw: -1.364e-12, dj_db:  6.250e+00   w:  1.625e+02, b: 1.62500e+02
dj_dw: -1.364e-12, dj_db:  6.250e+00   w:  1.625e+02, b: 1.62500e+02
dj_dw: -1.364e-12, dj_db:  6.250e+00   w:  1.625e+02, b: 1.62500e+02
dj_dw: -1.364e-12, dj_db:  6.250e+00   w:  1.625e+02, b: 1.62500e+02
dj_dw: -1.364e-12, dj_db:  6.250e+00   w:  1.625e+02, b: 1.62500e+02
(w,b) found by gradient descent: (162.5000,162.5000)
