In [8]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [None]:
# Load data
data = pd.read_csv("https://raw.githubusercontent.com/olivermueller/ds4b-2024/refs/heads/main/Session_03/advertising.csv")

# Extract y and x
y = data['Sales'].to_numpy()
x = data['TV'].to_numpy()

In [10]:
# Random start values for beta0 (intercept) and beta1 (slope)
beta0 = np.random.uniform(0, 1)
beta1 = np.random.uniform(0, 1)

# Define learning rate and maximum number of iterations of GD
learning_rate = 0.0000001
max_iter = 1000000

In [11]:
print("Initial values of beta0 and beta1: ", beta0, beta1)

Initial values of beta0 and beta1:  0.4524415536538662 0.5757655270999253


In [12]:
# Perform gradient descent
for i in range(1, max_iter+1):

    # Calculate partial derivatives of loss function with respect to beta0 and beta1
    d_beta0 = np.sum(2 * (beta0 + beta1 * x - y))
    d_beta1 = np.sum(2 * x * (beta0 + beta1 * x - y))

    # Update values of beta0 and beta1 (i.e., take a step into opposite direction of the gradient)
    beta0_new = beta0 - learning_rate * d_beta0
    beta1_new = beta1 - learning_rate * d_beta1
    beta0 = beta0_new
    beta1 = beta1_new

    # Print estimates every 10000 steps
    if i % 10000 == 0:
        print("Iteration:", i, "|", "Estimated intercept:", round(beta0, 6), "|", "Estimated slope:", round(beta1, 6))

Iteration: 10000 | Estimated intercept: 1.084167 | Estimated slope: 0.077744
Iteration: 20000 | Estimated intercept: 1.657297 | Estimated slope: 0.074834
Iteration: 30000 | Estimated intercept: 2.175206 | Estimated slope: 0.072204
Iteration: 40000 | Estimated intercept: 2.643215 | Estimated slope: 0.069827
Iteration: 50000 | Estimated intercept: 3.066131 | Estimated slope: 0.067679
Iteration: 60000 | Estimated intercept: 3.448299 | Estimated slope: 0.065739
Iteration: 70000 | Estimated intercept: 3.793645 | Estimated slope: 0.063985
Iteration: 80000 | Estimated intercept: 4.105717 | Estimated slope: 0.0624
Iteration: 90000 | Estimated intercept: 4.387721 | Estimated slope: 0.060968
Iteration: 100000 | Estimated intercept: 4.642554 | Estimated slope: 0.059674
Iteration: 110000 | Estimated intercept: 4.872834 | Estimated slope: 0.058504
Iteration: 120000 | Estimated intercept: 5.080927 | Estimated slope: 0.057448
Iteration: 130000 | Estimated intercept: 5.268969 | Estimated slope: 0.0564

In [6]:
# Print final estimated parameter values
print("Final estimated intercept:", round(beta0, 6), "|", "Final estimated slope:", round(beta1, 6))

Final estimated intercept: 6.708992 | Final estimated slope: 0.04918


In [7]:
# For comparison, let's look at the analytical solution
x_with_intercept = sm.add_constant(x)
ols = sm.OLS(y, x_with_intercept).fit()
print(ols.summary())

                            OLS Regression Results                            
Dep. Variable:                  Sales   R-squared:                       0.612
Model:                            OLS   Adj. R-squared:                  0.610
Method:                 Least Squares   F-statistic:                     312.1
Date:                Wed, 30 Oct 2024   Prob (F-statistic):           1.47e-42
Time:                        16:11:17   Log-Likelihood:                -519.05
No. Observations:                 200   AIC:                             1042.
Df Residuals:                     198   BIC:                             1049.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          7.0326      0.458     15.360      0.0