## Multiple Features - Linear Regression

In [3]:
import numpy as np

In [4]:
X_train = np.array([[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]])
y_train = np.array([460, 232, 178])

In [5]:
b_init = 785.1811367994083
w_init = np.array([0.39133535, 18.75376741, -53.36032453, -26.42131618])
print(f"w_init shape: {w_init.shape}, b_init type: {type(b_init)}")

w_init shape: (4,), b_init type: <class 'float'>


In [6]:
# single prediction, vector method
def predict(x, w, b):
    """
    x, w are vectors of shape (n,) and b is scalar
    """
    p = np.dot(x, w) + b
    return p

In [7]:
X_train

array([[2104,    5,    1,   45],
       [1416,    3,    2,   40],
       [ 852,    2,    1,   35]])

In [10]:
x_vec = X_train[0]
x_vec


array([2104,    5,    1,   45])

In [11]:
f_wb = predict(x_vec, w_init, b_init)
f_wb

np.float64(459.99999761940825)

In [12]:
# compute cost
# sum((y_pred - y_actual)^2)/2m

def compute_cost(X, y, w, b):
    """
    Args:
    X : matrix of features of m instances
    y : target values array
    w : weights array
    b : scalar
    """
    
    m = X.shape[0] # num of rows in matrix X
    
    cost = 0.0
    
    for i in range(m):
        f_wb_i = np.dot(X[i], w) + b
        cost = cost + (f_wb_i - y[i])**2
    cost = cost / (2*m)
    return cost
    
    

In [13]:
cost = compute_cost(X_train, y_train, w_init, b_init)
cost

np.float64(1.5578904880036537e-12)

In [17]:
def compute_gradient(X, y, w, b):
    m, n = X.shape # num of instances, num of features
    dj_dw = np.zeros((n,))
    dj_db = 0
    
    for i in range(m):
        err = (np.dot(X[i], w) + b) - y[i]
        
        for j in range(n):
            dj_dw[j] = dj_dw[j] + err * X[i, j]
            
        dj_db = dj_db + err
    
    dj_dw = dj_dw/m
    dj_db = dj_db/m
    
    return dj_db, dj_dw

In [18]:
tmp_dj_db, tmp_dj_dw = compute_gradient(X_train, y_train, w_init, b_init)

print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: -1.673925169143331e-06
dj_dw at initial w,b: 
 [-2.72623581e-03 -6.27197272e-06 -2.21745580e-06 -6.92403399e-05]


In [20]:
# gradient descent

def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    J_history = []
    w = w_in
    b = b_in
    
    for i in range(num_iters):
      dj_db, dj_dw =  gradient_function(X, y, w, b)
      
      w = w - alpha * dj_dw
      b = b - alpha * dj_db
      
        # Save cost J at each iteration
      if i<100000:      # prevent resource exhaustion 
        J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
    #   if i% math.ceil(num_iters / 10) == 0:
    #     print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")
      
    return w, b, J_history

In [22]:
initial_w = np.zeros_like(w_init)
initial_b = 0.

iterations = 1000
alpha = 5.0e-7
# run gradient descent
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                            compute_cost, compute_gradient,
                                            alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")

b,w found by gradient descent: -0.00,[ 0.20396569  0.00374919 -0.0112487  -0.0658614 ] 
