In [1]:
import pandas as pd

In [2]:
advertising_data = pd.read_csv('../Data Sets/Advertising.csv')

In [3]:
advertising_data.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [9]:
X = advertising_data[['TV', 'radio']].copy()

In [10]:
X.head()

Unnamed: 0,TV,radio
0,230.1,37.8
1,44.5,39.3
2,17.2,45.9
3,151.5,41.3
4,180.8,10.8


In [11]:
y = advertising_data['sales']

In [12]:
y.head()

0    22.1
1    10.4
2     9.3
3    18.5
4    12.9
Name: sales, dtype: float64

In [13]:
X['TV_Radio'] = X['TV'] * X['radio']

$$
\text{sales} = \beta_0 + \beta_1 \times \text{TV} + \beta_2 \times \text{radio} + \beta_3 \times (\text{radio} \times \text{TV}) + \epsilon
$$

$$
= \beta_0 + (\beta_1 + \beta_3 \times \text{radio}) \times \text{TV} + \beta_2 \times \text{radio} + \epsilon
$$


In [14]:
X.head()

Unnamed: 0,TV,radio,TV_Radio
0,230.1,37.8,8697.78
1,44.5,39.3,1748.85
2,17.2,45.9,789.48
3,151.5,41.3,6256.95
4,180.8,10.8,1952.64


In [15]:
import statsmodels.api as sm

In [16]:
X = sm.add_constant(X)

In [17]:
X.head()

Unnamed: 0,const,TV,radio,TV_Radio
0,1.0,230.1,37.8,8697.78
1,1.0,44.5,39.3,1748.85
2,1.0,17.2,45.9,789.48
3,1.0,151.5,41.3,6256.95
4,1.0,180.8,10.8,1952.64


In [18]:
model = sm.OLS(y, X).fit()

In [19]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                  sales   R-squared:                       0.968
Model:                            OLS   Adj. R-squared:                  0.967
Method:                 Least Squares   F-statistic:                     1963.
Date:                Thu, 03 Apr 2025   Prob (F-statistic):          6.68e-146
Time:                        15:48:34   Log-Likelihood:                -270.14
No. Observations:                 200   AIC:                             548.3
Df Residuals:                     196   BIC:                             561.5
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.7502      0.248     27.233      0.0

Model fitting using scikit-learn

In [20]:
X.head()

Unnamed: 0,const,TV,radio,TV_Radio
0,1.0,230.1,37.8,8697.78
1,1.0,44.5,39.3,1748.85
2,1.0,17.2,45.9,789.48
3,1.0,151.5,41.3,6256.95
4,1.0,180.8,10.8,1952.64


In [23]:
X = advertising_data[['TV', 'radio']]

In [24]:
X.head()

Unnamed: 0,TV,radio
0,230.1,37.8
1,44.5,39.3
2,17.2,45.9
3,151.5,41.3
4,180.8,10.8


In [25]:
y.head()

0    22.1
1    10.4
2     9.3
3    18.5
4    12.9
Name: sales, dtype: float64

In [27]:
from sklearn.preprocessing import PolynomialFeatures

In [29]:
polynomial = PolynomialFeatures(degree=2, include_bias=False, interaction_only=True)

In [30]:
X = polynomial.fit_transform(X)

In [31]:
X.head()

AttributeError: 'numpy.ndarray' object has no attribute 'head'

In [32]:
X.shape

(200, 3)

In [33]:
X[0]

array([ 230.1 ,   37.8 , 8697.78])

In [34]:
from sklearn.linear_model import LinearRegression

In [35]:
model = LinearRegression()

In [36]:
model.fit(X, y)

In [37]:
model.intercept_

6.7502202030751155

In [38]:
model.coef_

array([0.01910107, 0.02886034, 0.00108649])

In [39]:
test_data = pd.DataFrame({
    'TV': [110, 120, 130],
    'radio': [50, 60, 80]
})

In [40]:
test_data

Unnamed: 0,TV,radio
0,110,50
1,120,60
2,130,80


In [41]:
unseen_X = polynomial.fit_transform(test_data)

In [42]:
unseen_X[0]

array([ 110.,   50., 5500.])

In [43]:
predictions = model.predict(unseen_X)

In [44]:
predictions

array([16.27007616, 18.59673128, 22.84173185])

In [46]:
pd.concat(
    [
        pd.DataFrame(unseen_X, columns=['TV', 'Radio', 'TV_Radio']),
        pd.DataFrame(predictions, columns=['Predictions'])
    ],
    axis=1
)

Unnamed: 0,TV,Radio,TV_Radio,Predictions
0,110.0,50.0,5500.0,16.270076
1,120.0,60.0,7200.0,18.596731
2,130.0,80.0,10400.0,22.841732
