In [None]:
import copy, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.set_printoptions(precision=2)
from sklearn.metrics import mean_squared_error

: 

In [None]:
data = pd.read_csv("/content/advertising.csv")
data.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,12.0
3,151.5,41.3,58.5,16.5
4,180.8,10.8,58.4,17.9


In [None]:
x_train = data.iloc[:,:3].values
y_train = data.iloc[:,3].values

In [None]:
b_init = 0
w_init = np.random.random(3)

In [None]:
# def predict_single_loop(x, w, b):
#       """
#       single predict using linear regression

#       Args:
#         x (ndarray): Shape (n,) example with multiple features
#         w (ndarray): Shape (n,) model parameters
#         b (scalar):  model parameter

#       Returns:
#         p (scalar):  prediction
#       """
#       n = x.shape[0]
#       p = 0
#       for i in range(n):
#         p_i = x[i] * w[i]
#         p += p_i
#       p += b
#       return p


In [None]:
# x_vec = x_train[0,:]
# print(f"x_vec shape {x_vec.shape}, x_vec value: {x_vec}")

# f_wb = predict_single_loop(x_vec, w_init, b_init)
# print(f"f_wb shape {f_wb.shape}, prediction: {f_wb}")

In [None]:
def compute_cost(x, y, w, b):
    """
    compute cost
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters
      b (scalar)       : model parameter

    Returns:
      cost (scalar): cost
    """
    m = x.shape[0]
    cost = 0.0
    for i in range(m):
      f_wb_i = np.dot(x[i], w) + b
      cost += (f_wb_i - y[i])**2

    cost /= (2*m)
    return cost

In [None]:
cost = compute_cost(x_train, y_train, w_init, b_init)
print(f"Cost at optimal w: {cost}")

Cost at optimal w: 2644.7669058608653


In [None]:
def compute_gradient(x, y, w, b):
    """
    Computes the gradient for linear regression
    Args:
      X (ndarray (m,n)): Data, m examples with n features
      y (ndarray (m,)) : target values
      w (ndarray (n,)) : model parameters
      b (scalar)       : model parameter

    Returns:
      dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w.
      dj_db (scalar):       The gradient of the cost w.r.t. the parameter b.
    """
    m, n = x.shape
    dj_dw = np.zeros((n,))
    dj_db = 0.0

    for i in range(m):
      err = (np.dot(x[i], w) + b) - y[i]
      for j in range(n):
        dj_dw[j] = dj_dw[j] + err*x[i, j]
      dj_db = dj_db + err

    dj_dw /= m
    dj_db /= m

    return dj_db, dj_dw


In [None]:
#Compute and display gradient
tmp_dj_db, tmp_dj_dw = compute_gradient(x_train, y_train, w_init, b_init)
print(f'dj_db at initial w,b: {tmp_dj_db}')
print(f'dj_dw at initial w,b: \n {tmp_dj_dw}')

dj_db at initial w,b: 64.73744096186928
dj_dw at initial w,b: 
 [12252.64  1656.45  2132.21]


In [None]:
def gradient_descent(x, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
      """
      Performs batch gradient descent to learn w and b. Updates w and b by taking
      num_iters gradient steps with learning rate alpha

      Args:
        X (ndarray (m,n))   : Data, m examples with n features
        y (ndarray (m,))    : target values
        w_in (ndarray (n,)) : initial model parameters
        b_in (scalar)       : initial model parameter
        cost_function       : function to compute cost
        gradient_function   : function to compute the gradient
        alpha (float)       : Learning rate
        num_iters (int)     : number of iterations to run gradient descent

      Returns:
        w (ndarray (n,)) : Updated values of parameters
        b (scalar)       : Updated value of parameter
        """
      J_history = []
      w = copy.deepcopy(w_in)
      b = b_in

      for i in range(num_iters):
        dj_db, dj_dw = gradient_function(x, y, w, b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db

      if i<100000:
        J_history.append(cost_function(x, y, w, b))

      if i%math.ceil(num_iters/10) == 0:
        print(f"Iteration {i:4d}: cost {J_history[-1]: 8.2f}")

      return w, b, J_history


In [None]:
# initialize parameters
initial_w = np.zeros_like(w_init)
initial_b = 0.
# some gradient descent settings
iterations = 1000
alpha = 5.0e-7
# run gradient descent
w_final, b_final, J_hist = gradient_descent(x_train, y_train, initial_w, initial_b,
                                                    compute_cost, compute_gradient,
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = x_train.shape
y_pred = np.zeros(m)

for i in range(m):
    estimate = np.dot(x_train[i], w_final) + b_final
    y_pred[i] = estimate
    print(f"prediction: {estimate:0.2f}, target value: {y_train[i]}")


rmse = math.sqrt(mean_squared_error(y_pred, y_train))
print(rmse)

b,w found by gradient descent: 0.00,[0.08 0.03 0.03] 
prediction: 22.42, target value: 22.1
prediction: 6.48, target value: 10.4
prediction: 5.27, target value: 12.0
prediction: 15.75, target value: 16.5
prediction: 17.11, target value: 17.9
prediction: 4.87, target value: 7.2
prediction: 6.60, target value: 11.8
prediction: 10.89, target value: 13.2
prediction: 0.81, target value: 4.8
prediction: 17.15, target value: 15.6
prediction: 6.41, target value: 12.6
prediction: 18.52, target value: 17.4
prediction: 5.33, target value: 9.2
prediction: 8.48, target value: 13.7
prediction: 19.35, target value: 19.0
prediction: 19.37, target value: 22.4
prediction: 10.58, target value: 12.5
prediction: 26.23, target value: 24.4
prediction: 6.97, target value: 11.3
prediction: 13.50, target value: 14.6
prediction: 20.59, target value: 18.0
prediction: 20.39, target value: 17.5
prediction: 3.27, target value: 5.6
prediction: 20.13, target value: 20.5
prediction: 6.14, target value: 9.7
prediction: 