In [15]:
from matplotlib.pyplot import subplots
import statsmodels.api as sm
import pandas as pd
import numpy as np
from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF
from statsmodels.stats.anova import anova_lm
from ISLP import load_data
from ISLP.models import (ModelSpec as MS, summarize, poly)

In [16]:
#Gives advertising budget in thousands of dollars for TV, radio, and newspaper. Total sales in thousands of units.
advertising = pd.read_csv('Advertising.csv', index_col=[0])
advertising

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9
...,...,...,...,...
196,38.2,3.7,13.8,7.6
197,94.2,4.9,8.1,9.7
198,177.0,9.3,6.4,12.8
199,283.6,42.0,66.2,25.5


In [17]:
#Ordinary least squares method
tv = pd.DataFrame({'intercept': np.ones(advertising.shape[0]), 'sales': advertising['TV']})
tv[:4]

Unnamed: 0,intercept,sales
1,1.0,230.1
2,1.0,44.5
3,1.0,17.2
4,1.0,151.5


In [18]:
y = advertising['sales']
model = sm.OLS(y, tv)
results = model.fit()
#Shows that for every $1000 spent on TV advertising, there will be an expected increase of 4.75 units in sales
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,7.0326,0.458,15.36,0.0
sales,0.0475,0.003,17.668,0.0


In [19]:
radio = pd.DataFrame({'intercept': np.ones(advertising.shape[0]), 'sales': advertising['radio']})
model = sm.OLS(y, radio)
results = model.fit()
#Shows that for every $1000 spent on radio advertising, there will be an expected increase of 202.5 units in sales
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,9.3116,0.563,16.542,0.0
sales,0.2025,0.02,9.921,0.0


In [20]:
newspaper = pd.DataFrame({'intercept': np.ones(advertising.shape[0]), 'sales': advertising['newspaper']})
model = sm.OLS(y, newspaper)
results = model.fit()
#Shows that for every $1000 spent on radio advertising, there will be an expected increase of 54.7 units in sales
summarize(results)

Unnamed: 0,coef,std err,t,P>|t|
intercept,12.3514,0.621,19.876,0.0
sales,0.0547,0.017,3.3,0.001


Though when analyzed individually in a Linear Regression newspaper had a positive effect on sales, when taken together as a multiple regression it appears that is has a negative effect on sales.

In [21]:
#Multiple Linear Regression
X = MS(['TV', 'radio', 'newspaper']).fit_transform(advertising)
model1 = sm.OLS(y,X)
results1 = model1.fit()
summarize(results1)

Unnamed: 0,coef,std err,t,P>|t|
intercept,2.9389,0.312,9.422,0.0
TV,0.0458,0.001,32.809,0.0
radio,0.1885,0.009,21.893,0.0
newspaper,-0.001,0.006,-0.177,0.86


Given the lack of statistical significance of the newspaper variable, we will remove it from the analysis and reassess.

In [22]:
#Multiple Linear Regression w/out newspaper
X = MS(['TV', 'radio']).fit_transform(advertising)
model1 = sm.OLS(y,X)
results1 = model1.fit()
summarize(results1)

Unnamed: 0,coef,std err,t,P>|t|
intercept,2.9211,0.294,9.919,0.0
TV,0.0458,0.001,32.909,0.0
radio,0.188,0.008,23.382,0.0


Radio advertising would yield an estimated 188.0 additional units in sales for every $1000 spent in radio ads.

Should there be additional investment in Advertising, the recommendation would be to direct these funds towards radio as radio advertising yields a greater increase in sales than the other advertising mediums.