In [14]:
import copy, math

In [12]:
X_train=[[2104, 5, 1, 45], [1416, 3, 2, 40], [852, 2, 1, 35]]
y_train=[460, 232, 178]

In [5]:
#function to predict y when x,w and b are given
def predict_single_value(x,w,b):
  #x is an array that consists of m values and each of these parameters are multiplied with its corresponding coefficient w
  # the size of w and x is the same
  m=len(x)
  y_hat=b
  for i in range(m):          #this can be done more efficiently using np.dot(x,w)
    y_hat+=w[i]*x[i]
  return y_hat

In [2]:
#function to compute cost with multiple variables
def compute_cost(x,y,w,b):
  m=len(x)
  cost=0
  for i in range(m):
    f_wb_i = predict_single_value(x[i],w,b)
    cost = cost + (f_wb_i - y[i])**2
  cost = cost / (2 * m)
  return cost


In [9]:
#function to compute gradient
def compute_gradient(x,y,w,b):
  m=len(x)
  n=len(x[0])
  dj_dw = [0]*n
  dj_db = 0
  for i in range(m):
    err = predict_single_value(x[i],w,b) - y[i]
    for j in range(n):
      dj_dw[j] = dj_dw[j] + err * x[i][j]
      dj_db = dj_db + err
  # dj_dw = dj_dw / m
  dj_dw = list(map(lambda x: x/m, dj_dw))
  dj_db = dj_db / m

  return dj_db, dj_dw

In [10]:
def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in

    for i in range(num_iters):
        dj_db,dj_dw = gradient_function(X, y, w, b)
        for j in range(len(dj_dw)):
          w[j] = w[j] - alpha * dj_dw[j]
        b = b - alpha * dj_db

        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion
            J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")

    return w, b, J_history #return final w,b and J history for graphing

In [15]:
# initialize parameters
initial_w = [0]*len(X_train[0])
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 0.00000005
# run gradient descent
w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient,
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m = len(X_train)
for i in range(m):
    prediction=predict_single_value(X_train[i], w_final, b_final)
    print(f"prediction: {prediction}, target value: {y_train[i]}")

Iteration    0: Cost 38557.85   
Iteration  100: Cost   696.97   
Iteration  200: Cost   696.86   
Iteration  300: Cost   696.76   
Iteration  400: Cost   696.65   
Iteration  500: Cost   696.54   
Iteration  600: Cost   696.43   
Iteration  700: Cost   696.32   
Iteration  800: Cost   696.21   
Iteration  900: Cost   696.11   
b,w found by gradient descent: -0.00,[0.20235008252913314, 0.0007946647357052938, -0.0009850928835913079, -0.0021249573889986907] 
prediction: 425.6514689868309, target value: 460
prediction: 286.44266257137724, target value: 232
prediction: 172.32803124003902, target value: 178
