In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.preprocessing import PowerTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression

In [None]:
df=pd.read_csv(r'../input/advertising-dataset/advertising.csv')
df.head()

In [None]:
df.shape

In [None]:
df.info()

**We can see that all the columns are of type float64.**

## Exploring the Numeric Variables-Five number summary

In [None]:
df.describe()

## Checking for null values:

In [None]:
df.isna().sum()

No null values present

## Visualizing the Boxplot

In [None]:
color_discrete_sequence1=['red','green','blue','magenta']
for i in range(0,len(df.columns)):
    fig=px.box(df,df.columns[i],color_discrete_sequence=[color_discrete_sequence1[i]])
    fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    paper_bgcolor="Black",
)
    fig.show()

We can see that there are Outliers in the newspaper columns which need to beremoved for further analysis.

## Removing Outliers:

In [None]:
q3=df['Newspaper'].quantile(0.75)
q1=df['Newspaper'].quantile(0.25)
iqr=q3-q1
ub=q3+1.5*iqr
lb=q1-1.5*iqr
new_df=df[~((df['Newspaper'] < lb) | (df['Newspaper'] > ub))]

In [None]:
color_discrete_sequence1=['red','green','blue','magenta']
for i in range(0,len(new_df.columns)):
    fig=px.box(new_df,new_df.columns[i],color_discrete_sequence=[color_discrete_sequence1[i]])
    fig.update_layout(
    autosize=False,
    width=500,
    height=500,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
    paper_bgcolor="black",
)
    fig.show()

As we can se that Outliers has been removed from the Newspaper column.

## checking the skewness

In [None]:
new_df.skew()

All the values of skewness are in range of -0.5 to +0.5 except of the Newspaper column so we need to correct them

## Pairplot

In [None]:
sns.pairplot(new_df,x_vars=['TV','Radio','Newspaper','Sales'],y_vars=['TV','Radio','Newspaper','Sales'],diag_kind='kde')
plt.show()

In [None]:
df1=new_df.copy(deep=True)
pt=PowerTransformer(standardize=False)
pt_sc=pt.fit_transform(df1)
pt_sc=pd.DataFrame(pt_sc,columns=df1.columns)
pt_sc

## After Power Transformer

In [None]:
sns.pairplot(pt_sc,x_vars=['TV','Radio','Newspaper','Sales'],y_vars=['TV','Radio','Newspaper','Sales'],diag_kind='kde')
plt.show()

In [None]:
pt_sc.skew()

Skewness has been removed using power transformer.

## Dependent and Independent Variable

In [None]:
X=pt_sc.drop(['Sales'],axis=1)
y=pt_sc['Sales']

## Splitting the Data into Train Test Split.

In [None]:
xtrain,xtest,ytrain,ytest=train_test_split(X,y,test_size=0.3,random_state=48)

In [None]:
print(xtrain.shape)
print(xtest.shape)
print(ytrain.shape)
print(ytest.shape)

# Visualizing the results

## TV Advertising Vs Sales

In [None]:
fig=px.scatter(pt_sc,x=pt_sc['TV'], y=pt_sc['Sales'],trendline='ols',trendline_color_override='red')
fig.show()
results = px.get_trendline_results(fig)
print(results)
results.px_fit_results.iloc[0].summary()

 For **Advertising** data, the least square fit for the regression of sales on **TV** is shown.

## Radio Advertising Vs Sales

In [None]:
fig=px.scatter(pt_sc,x=pt_sc['Radio'], y=pt_sc['Sales'],trendline='ols',trendline_color_override='magenta')
fig.show()
results = px.get_trendline_results(fig)
print(results)
results.px_fit_results.iloc[0].summary()

For **Advertising** data, the least square fit for the regression of sales on **Radio** is shown.

## Newspaper Advertising vs Sales

In [None]:
fig=px.scatter(pt_sc,x=pt_sc['Newspaper'], y=pt_sc['Sales'],trendline='ols',trendline_color_override='green')
fig.show()
results = px.get_trendline_results(fig)
print(results)
results.px_fit_results.iloc[0].summary()

For **Advertising** data, the least square fit for the regression of sales on **Newspaper** is shown.

## Fitting Multiple Linear Regression

In [None]:
model = LinearRegression()
model.fit(X, y)

colors = ['Positive' if c > 0 else 'Negative' for c in model.coef_]

fig = px.bar(
    x=X.columns, y=model.coef_, color=colors,
    color_discrete_sequence=['blue'],
    labels=dict(x='Feature', y='Linear coefficient'),
    title='Weight of each feature for predicting Sales'
)
fig.show()

In [None]:
ypred_test=model.predict(xtest)
ypred_train=model.predict(xtrain)

In [None]:
r2_score_test=r2_score(ytest,ypred_test)
r2_score_test

In [None]:
r2_score_test=r2_score(ytrain,ypred_train)
r2_score_test

In [None]:
model.coef_

In [None]:
model.intercept_