In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("advertising.csv")
df.head()

## Problem Statement
Build a model which predicts sales based on the money spent on different platforms for marketing

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
sns.pairplot(df)

In [None]:
sns.pairplot(df, x_vars=["TV", "radio", "newspaper"], y_vars="sales", kind="scatter")

In [None]:
df.corr()

In [None]:
df.corr().style.background_gradient()

In [None]:
x = df.iloc[:,:-1]
y = df.iloc[:, -1]

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.3, random_state=1)

from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(xtrain, ytrain)
ypred = linreg.predict(xtest)

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae = mean_absolute_error(ytest, ypred)
mse = mean_squared_error(ytest, ypred)
rmse = np.sqrt(mse)
r2 = r2_score(ytest, ypred)

print(f"MAE : {mae}\nMSE : {mse}\nRMSE : {rmse}\nAccuracy : {r2}")

In [None]:
linreg.intercept_

In [None]:
linreg.coef_

In [None]:
pd.DataFrame(linreg.coef_, index=x.columns, columns=["Coefficients"])

- For every unit growth in TV, we estimate that Sales will grow by 0.046

In [None]:
#Linear Regression Assumptions
#1. Linear Relationship
#predicted values vs residual

residual = ytest - ypred

plt.figure()
sns.scatterplot(ypred, residual)
plt.show()

In [None]:
sns.displot(residual)

In [None]:
#remove newspaper, to avoid multi colinearity
x = df.iloc[:,:-2]
y = df.iloc[:, -1]

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.3, random_state=1)

from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(xtrain, ytrain)
ypred = linreg.predict(xtest)

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae = mean_absolute_error(ytest, ypred)
mse = mean_squared_error(ytest, ypred)
rmse = np.sqrt(mse)
r2 = r2_score(ytest, ypred)

print(f"MAE : {mae}\nMSE : {mse}\nRMSE : {rmse}\nAccuracy : {r2}")

# Polynomial Regression

In [None]:
from sklearn.preprocessing import PolynomialFeatures
pf = PolynomialFeatures(degree=2)
xtrainp = pf.fit_transform(xtrain)
xtestp = pf.transform(xtest)

In [None]:
from sklearn.linear_model import LinearRegression
linreg = LinearRegression()
linreg.fit(xtrainp, ytrain)
ypred = linreg.predict(xtestp)

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae = mean_absolute_error(ytest, ypred)
mse = mean_squared_error(ytest, ypred)
rmse = np.sqrt(mse)
r2 = r2_score(ytest, ypred)

print(f"MAE : {mae}\nMSE : {mse}\nRMSE : {rmse}\nAccuracy : {r2}")