# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import seaborn as sns

# Reading data

In [None]:
df = pd.read_csv('../input/tvradionewspaperadvertising/Advertising.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

# Plotting

In [None]:
df.columns

In [None]:
for i in ['TV', 'Radio', 'Newspaper']:
    sns.scatterplot(x=i,y='Sales',data=df)
    plt.show()
          

In [None]:
sns.pairplot(df,diag_kind='kde')

In [None]:
plt.figure(dpi=150)
sns.heatmap(df.corr(),annot=True,cmap='viridis',lw=1)

# Using Normal Equation      $\theta=\left(X^{T} X\right)^{-1} \cdot\left(X^{T} y\right)$

In [None]:
x = df[['TV', 'Radio', 'Newspaper']]
y = np.array(df['Sales'])

In [None]:
x_new = np.c_[np.ones((200,1)),x]

In [None]:
#x_new
#y

**Now we can use the normal equation to find theta values**

In [None]:
theta = np.linalg.inv(x_new.T.dot(x_new)).dot(x_new.T).dot(y)

In [None]:
theta

## We can also compute theta using Singular Value Decompostion (SVD), pseudo inverse
#### **In cases where inverse does not exist,normal equation won't work but pseudo inverse is always defined and handles all the edge cases nicely**

In [None]:
theta_svd = np.linalg.pinv(x_new).dot(y)

In [None]:
theta_svd

# Predictions on new data

In [None]:
new_data = np.array([[1,100,50,70]])

## $h(\theta)=\theta_{0} x_{0}+\theta_{1} x_{1}+\ldots \theta_{n} x_{n}$ ($x_{0} = 1$) using this formula, we can say

In [None]:
new_prediction = new_data.dot(theta)
new_prediction

## train test split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.3,random_state=42)

## Scaling the data

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

## Now use LinearRegression from Scikit-learn

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
linear_model = LinearRegression()

### Fit/Train the Model on the training data

In [None]:
linear_model.fit(x_train_scaled,y_train)

### Coefficients of linear model

In [None]:
linear_model.coef_

# Understanding and utilizing the Model

-----

## Evaluation on the Test Set

In [None]:
predictions = linear_model.predict(x_test_scaled)

In [None]:
predictions

### Metrics

Make sure you've viewed the video on these metrics!
The three most common evaluation metrics for regression problems:

**Mean Absolute Error** (MAE) is the mean of the absolute value of the errors:

$$\frac 1n\sum_{i=1}^n|y_i-\hat{y}_i|$$

**Mean Squared Error** (MSE) is the mean of the squared errors:

$$\frac 1n\sum_{i=1}^n(y_i-\hat{y}_i)^2$$

**Root Mean Squared Error** (RMSE) is the square root of the mean of the squared errors:

$$\sqrt{\frac 1n\sum_{i=1}^n(y_i-\hat{y}_i)^2}$$

Comparing these metrics:

- **MAE** is the easiest to understand, because it's the average error.
- **MSE** is more popular than MAE, because MSE "punishes" larger errors, which tends to be useful in the real world.
- **RMSE** is even more popular than MSE, because RMSE is interpretable in the "y" units.

All of these are **loss functions**, because we want to minimize them.

In [None]:
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [None]:
mae = mean_absolute_error(y_test,predictions)
rmse = np.sqrt(mean_squared_error(y_test,predictions))

In [None]:
mae

In [None]:
rmse

### Plotting residuals

In [None]:
sns.displot(predictions-y_test, bins=30,kde=True)

In [None]:
sns.scatterplot(x=y_test,y=predictions-y_test)
plt.axhline(y=0,c='r',ls='--')

### If satisfied with the performance, train the model on the whole data

In [None]:
x = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']

In [None]:
final_linear_model = LinearRegression()

In [None]:
final_linear_model.fit(x,y)

### Predicting on new data

In [None]:
new_data = [[150,20,100]]

In [None]:
scaled_new_data = scaler.transform(new_data)

In [None]:
scaled_new_data

In [None]:
final_linear_model.predict(new_data)