In [4]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [5]:
# Load data
data = pd.read_csv("https://raw.githubusercontent.com/olivermueller/ds4b-2024/refs/heads/main/Session_03/advertising.csv")

# Extract y and x
y = data['Sales'].to_numpy()
x = data['TV'].to_numpy()

In [6]:
# Random start values for beta0 (intercept) and beta1 (slope)
beta0 = np.random.uniform(0, 1)
beta1 = np.random.uniform(0, 1)

# Define learning rate and maximum number of iterations of GD
learning_rate = 0.0000001
max_iter = 1000000

In [7]:
print("Initial values of beta0 and beta1: ", beta0, beta1)

Initial values of beta0 and beta1:  0.5563679798015517 0.258395034124269


In [8]:
# Perform gradient descent
for i in range(1, max_iter+1):

    # Calculate partial derivatives of loss function with respect to beta0 and beta1
    d_beta0 = np.sum(2 * (beta0 + beta1 * x - y))
    d_beta1 = np.sum(2 * x * (beta0 + beta1 * x - y))

    # Update values of beta0 and beta1 (i.e., take a step into opposite direction of the gradient)
    beta0_new = beta0 - learning_rate * d_beta0
    beta1_new = beta1 - learning_rate * d_beta1
    beta0 = beta0_new
    beta1 = beta1_new

    # Print estimates every 10000 steps
    if i % 10000 == 0:
        print("Iteration:", i, "|", "Estimated intercept:", round(beta0, 6), "|", "Estimated slope:", round(beta1, 6))

Iteration: 10000 | Estimated intercept: 1.179535 | Estimated slope: 0.07726
Iteration: 20000 | Estimated intercept: 1.743476 | Estimated slope: 0.074396
Iteration: 30000 | Estimated intercept: 2.253081 | Estimated slope: 0.071808
Iteration: 40000 | Estimated intercept: 2.713587 | Estimated slope: 0.06947
Iteration: 50000 | Estimated intercept: 3.129722 | Estimated slope: 0.067356
Iteration: 60000 | Estimated intercept: 3.505763 | Estimated slope: 0.065447
Iteration: 70000 | Estimated intercept: 3.845573 | Estimated slope: 0.063721
Iteration: 80000 | Estimated intercept: 4.152642 | Estimated slope: 0.062162
Iteration: 90000 | Estimated intercept: 4.430125 | Estimated slope: 0.060753
Iteration: 100000 | Estimated intercept: 4.680872 | Estimated slope: 0.059479
Iteration: 110000 | Estimated intercept: 4.90746 | Estimated slope: 0.058329
Iteration: 120000 | Estimated intercept: 5.112216 | Estimated slope: 0.057289
Iteration: 130000 | Estimated intercept: 5.297244 | Estimated slope: 0.05634

In [9]:
# Print final estimated parameter values
print("Final estimated intercept:", round(beta0, 6), "|", "Final estimated slope:", round(beta1, 6))

Final estimated intercept: 7.032336 | Final estimated slope: 0.047538


In [10]:
# For comparison, let's look at the analytical solution
x_with_intercept = sm.add_constant(x)
ols = sm.OLS(y, x_with_intercept).fit()
print(ols.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.612
Model:                            OLS   Adj. R-squared:                  0.610
Method:                 Least Squares   F-statistic:                     312.1
Date:                Wed, 30 Oct 2024   Prob (F-statistic):           1.47e-42
Time:                        16:59:32   Log-Likelihood:                -519.05
No. Observations:                 200   AIC:                             1042.
Df Residuals:                     198   BIC:                             1049.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.0326      0.458     15.360      0.0