In [40]:
import pandas as pd
import seaborn as sns
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import numpy as np
from sklearn.model_selection import train_test_split
%matplotlib inline 

In [2]:
pd.options.display.float_format = '{:.2f}'.format

In [3]:
df = pd.read_csv("Advertising.csv", index_col=0)
df.head()

Unnamed: 0,TV,radio,newspaper,sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [6]:
#stats model
model=smf.ols(formula="sales ~ TV + radio + newspaper",data=df).fit()

In [7]:
model.summary()

0,1,2,3
Dep. Variable:,sales,R-squared:,0.897
Model:,OLS,Adj. R-squared:,0.896
Method:,Least Squares,F-statistic:,570.3
Date:,"Sat, 04 Jul 2020",Prob (F-statistic):,1.58e-96
Time:,22:14:05,Log-Likelihood:,-386.18
No. Observations:,200,AIC:,780.4
Df Residuals:,196,BIC:,793.6
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2.9389,0.312,9.422,0.000,2.324,3.554
TV,0.0458,0.001,32.809,0.000,0.043,0.049
radio,0.1885,0.009,21.893,0.000,0.172,0.206
newspaper,-0.0010,0.006,-0.177,0.860,-0.013,0.011

0,1,2,3
Omnibus:,60.414,Durbin-Watson:,2.084
Prob(Omnibus):,0.0,Jarque-Bera (JB):,151.241
Skew:,-1.327,Prob(JB):,1.44e-33
Kurtosis:,6.332,Cond. No.,454.0


In [8]:
#sklearn 
feature_cols=["TV","radio","newspaper"]
X=df[feature_cols]
Y=df["sales"]

In [9]:
model2=LinearRegression()

In [10]:
model2.fit(X,Y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [11]:
print("coefficients :-",model2.coef_)

coefficients [ 0.04576465  0.18853002 -0.00103749]


In [12]:
print("intercept :-",model2.intercept_)

intercept :- 2.9388893694594067


In [14]:
list(zip(feature_cols, model2.coef_))

[('TV', 0.04576464545539761),
 ('radio', 0.18853001691820462),
 ('newspaper', -0.0010374930424762972)]

# feature selection

In [15]:
model1=smf.ols(formula="sales ~ TV + radio + newspaper",data=df).fit()

In [16]:
model1.rsquared

0.8972106381789522

In [17]:
model2=smf.ols(formula="sales ~  radio + newspaper",data=df).fit()

In [18]:
model2.rsquared

0.33270518395032256

In [19]:
model3=smf.ols(formula="sales ~ TV  + newspaper",data=df).fit()

In [20]:
model3.rsquared

0.6458354938293271

In [21]:
model4=smf.ols(formula="sales ~ TV + radio ",data=df).fit()

In [23]:
model4.rsquared

0.8971942610828957

# model evaluation matrix for regression

In [28]:
y_true = [100, 50, 30, 20]
y_pred = [90, 50, 50, 30]

# calculate MAE, MSE, RMSE
print("MEAN ABSOLUTE ERROR :-",metrics.mean_absolute_error(y_true, y_pred))
print("MEAN SQUARED ERROR :-",metrics.mean_squared_error(y_true, y_pred))
print("ROOT MEAN SQUARED ERROR :-",np.sqrt(metrics.mean_squared_error(y_true, y_pred)))

MEAN ABSOLUTE ERROR :- 10.0
MEAN SQUARED ERROR :- 150.0
ROOT MEAN SQUARED ERROR :- 12.24744871391589


# model evaluation using train test split

In [46]:
#include newspaper
X = df[['TV', 'radio', 'newspaper']]
y = df["sales"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Instantiate model
lm2 = LinearRegression()

# Fit Model
lm2.fit(X_train, y_train)

# Predict
y_pred = lm2.predict(X_test)

# RMSE
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

1.4046514230328955


In [47]:
# exclude Newspaper
X = df[['TV', 'radio']]
y = df["sales"]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Instantiate model
lm2 = LinearRegression()

# Fit model
lm2.fit(X_train, y_train)

# Predict
y_pred = lm2.predict(X_test)

# RMSE
print(np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

1.3879034699382888
