#### Cost function

In [33]:
import math, copy
import numpy as np
import matplotlib.pyplot as plt


In [34]:
x_train = np.array([1.0, 2.0])           #(size in 1000 square feet)
y_train = np.array([300.0, 500.0])           #(price in 1000s of dollars)

In [35]:
def compute_cost(x, y, w, b):
    m = x.shape[0]
    total_sum = 0
    for i in range(m):
        f_wb = w * x[i] + b
        cost = (f_wb - y[i])**2
        total_sum += cost
    total_cost = (1/(2*m)) * total_sum
    return total_cost

In [36]:
w, b = 200, 100
cost = compute_cost(x_train, y_train, w, b)
print(f'The cost of selection parameters w = {w} and b = {b} is {cost}')

The cost of selection parameters w = 200 and b = 100 is 0.0


In [37]:
def compute_gradient(x, y, w, b):
    m = len(x)
    dj_dw = 0
    dj_db = 0
    for i in range(m):
        f_wb_i = w*x[i] + b
        # calculate the partial derivatives of cost for this example
        dj_dw_i = f_wb_i - y[i]
        dj_db_i = (f_wb_i -y[i]) * x[i]
        # add total
        dj_db += dj_db_i
        dj_dw += dj_dw_i
    dj_dw = (1/m) * dj_dw
    dj_db = (1/m) * dj_db
    return dj_dw, dj_db
compute_gradient(x_train, y_train, w, b)


(0.0, 0.0)

In [44]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function):
    """
    Performs gradient descent to fit w,b. Updates w,b by taking
    num_iters gradient steps with learning rate alpha

    Args:
      x (ndarray (m,))  : Data, m examples
      y (ndarray (m,))  : target values
      w_in,b_in (scalar): initial values of model parameters
      alpha (float):     Learning rate
      num_iters (int):   number of iterations to run gradient descent
      cost_function:     function to call to produce cost
      gradient_function: function to call to produce gradient

    Returns:
      w (scalar): Updated value of parameter after running gradient descent
      b (scalar): Updated value of parameter after running gradient descent
      J_history (List): History of cost values
      p_history (list): History of parameters [w,b]
      """
    J_history = []
    P_history = []
    w = copy.deepcopy(w_in)
    w = w_in
    b = b_in
    for i in range(num_iters):
        dj_dw, dj_db = gradient_function(x, y, w, b)
        w = w - (alpha * dj_dw)
        b = b - (alpha * dj_db)
        if i < 1000:
          J_history.append(cost_function(x, y, w, b))
          P_history.append([w, w])
        if i % math.ceil(num_iters / 10) == 0:
          print(f"Iteration {i:4}: Cost {J_history[-1]:0.2e} ",
                  f"dj_dw: {dj_dw: 0.3e}, dj_db: {dj_db: 0.3e}  ",
                  f"w: {w: 0.3e}, b:{b: 0.5e}")
    return w, b, J_history, P_history


In [45]:
# initialize parameters
w_init = 0
b_init = 0
# some gradient descent settings
iterations = 10000
tmp_alpha = 1.0e-2
# run gradient descent
w_final, b_final, J_hist, p_hist = gradient_descent(x_train ,y_train, w_init, b_init, tmp_alpha, iterations, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

Iteration    0: Cost 7.99e+04  dj_dw: -4.000e+02, dj_db: -6.500e+02   w:  4.000e+00, b: 6.50000e+00
Iteration 1000: Cost 3.04e+03  dj_dw:  1.248e+01, dj_db: -1.974e+01   w:  4.601e+01, b: 3.43485e+02
Iteration 2000: Cost 3.04e+03  dj_dw:  2.809e+01, dj_db: -4.442e+01   w: -1.465e+02, b: 6.47912e+02
Iteration 3000: Cost 3.04e+03  dj_dw:  6.322e+01, dj_db: -9.996e+01   w: -5.798e+02, b: 1.33296e+03
Iteration 4000: Cost 3.04e+03  dj_dw:  1.423e+02, dj_db: -2.249e+02   w: -1.555e+03, b: 2.87452e+03
Iteration 5000: Cost 3.04e+03  dj_dw:  3.201e+02, dj_db: -5.062e+02   w: -3.749e+03, b: 6.34348e+03
Iteration 6000: Cost 3.04e+03  dj_dw:  7.204e+02, dj_db: -1.139e+03   w: -8.686e+03, b: 1.41496e+04
Iteration 7000: Cost 3.04e+03  dj_dw:  1.621e+03, dj_db: -2.563e+03   w: -1.980e+04, b: 3.17158e+04
Iteration 8000: Cost 3.04e+03  dj_dw:  3.648e+03, dj_db: -5.768e+03   w: -4.480e+04, b: 7.12447e+04
Iteration 9000: Cost 3.04e+03  dj_dw:  8.209e+03, dj_db: -1.298e+04   w: -1.011e+05, b: 1.60196e+05
