# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Reading data

In [None]:
df = pd.read_csv('../input/tvradionewspaperadvertising/Advertising.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

# Plots

In [None]:
df.columns

In [None]:
for i in ['TV', 'Radio', 'Newspaper']:
    sns.scatterplot(x=i,y='Sales',data=df)
    plt.show()
          

In [None]:
sns.pairplot(df,diag_kind='kde')

In [None]:
plt.figure(dpi=150)
sns.heatmap(df.corr(),annot=True, cmap='viridis',lw=1)

In [None]:
x = df.drop('Sales',axis=1)
y = df['Sales']

# Polynomial Regression

**From Preprocessing, import PolynomialFeatures, which will help us transform our original data set by adding polynomial features**

We will go from the equation in the form (shown here as if we only had one x feature):

$$\hat{y} = \beta_0 + \beta_1x_1 + \epsilon $$

and create more features from the original x feature for some *d* degree of polynomial.

$$\hat{y} = \beta_0 + \beta_1x_1 + \beta_1x^2_1 +  ... + \beta_dx^d_1 + \epsilon$$

Then we can call the linear regression model on it, since in reality, we're just treating these new polynomial features x^2, x^3, ... x^d as new features. Obviously we need to be careful about choosing the correct value of *d* , the degree of the model. Our metric results on the test set will help us with this!

**The other thing to note here is we have multiple X features, not just a single one as in the formula above, so in reality, the PolynomialFeatures will also take *interaction* terms into account for example, if an input sample is two dimensional and of the form [a, b], the degree-2 polynomial features are [1, a, b, a^2, ab, b^2].**

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
polynomial_conevrter = PolynomialFeatures(degree=2, include_bias=False)

In [None]:
polynomial_features = polynomial_conevrter.fit_transform(x)

In [None]:
print(x.shape)
print(polynomial_features.shape)

In [None]:
data = pd.DataFrame(polynomial_features,columns='tv rad news tv**2 tv*rad tv*news rad**2 rad*news news**2'.split())

### We converted the previous data frame into a degree 2 data frame where new columns are added

In [None]:
data.head()

## Train | Test Split

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(polynomial_features, y, test_size=0.3, random_state=42)

## Scaling the data

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

## Model for fitting on Polynomial Data

### Create an instance of the model with parameters

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression()

## Fit/Train the Model on the training data

In [None]:
model.fit(x_train_scaled,y_train)

In [None]:
model.coef_


-----

## Evaluation on the Test Set

In [None]:
test_predictions = model.predict(x_test_scaled)

In [None]:
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [None]:
mae = mean_absolute_error(y_test,test_predictions)
rmse = np.sqrt(mean_squared_error(y_test,test_predictions))


## These results are better than the plain linear regression model without polunomial features
### see results at https://www.kaggle.com/tharunnayak14/linear-regression-scikit-learn-and-normal-equation

In [None]:
mae

In [None]:
rmse

## Plotting residuals

In [None]:
sns.displot(y_test-test_predictions, bins=30, kde=True)

In [None]:
sns.scatterplot(x=y_test,y=test_predictions-y_test)
plt.axhline(y=0,c='r',ls='--')

In [None]:
sns.scatterplot(x=y_test,y=test_predictions)

---

## Choosing best degress for polynomial features

### Adjusting Parameters

Are we satisfied with this performance? Perhaps a higher order would improve performance even more! But how high is too high? It is now up to us to possibly go back and adjust our model and parameters, let's explore higher order Polynomials in a loop and plot out their error. This will nicely lead us into a discussion on Overfitting.

Let's use a for loop to do the following:

1. Create different order polynomial X data
2. Split that polynomial data for train/test
3. Fit on the training data
4. Report back the metrics on *both* the train and test results
5. Plot these results and explore overfitting

In [None]:
# TRAINING ERROR PER DEGREE
train_rmse_errors = []
# TEST ERROR PER DEGREE
test_rmse_errors = []

for d in range(1,10):
    
    # CREATE POLY DATA SET FOR DEGREE "d"
    polynomial_converter = PolynomialFeatures(degree=d,include_bias=False)
    poly_features = polynomial_converter.fit_transform(x)
    
    # SPLIT THIS NEW POLY DATA SET
    X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=42)
    
    # SCALING
    x_train_scaled = scaler.fit_transform(X_train)
    x_test_scaled = scaler.transform(X_test)
    
    # TRAIN ON THIS NEW POLY SET
    model = LinearRegression(fit_intercept=True)
    model.fit(x_train_scaled,y_train)
    
    # PREDICT ON BOTH TRAIN AND TEST
    train_pred = model.predict(x_train_scaled)
    test_pred = model.predict(x_test_scaled)
    
    # Calculate Errors
    
    # Errors on Train Set
    train_RMSE = np.sqrt(mean_squared_error(y_train,train_pred))
    
    # Errors on Test Set
    test_RMSE = np.sqrt(mean_squared_error(y_test,test_pred))

    # Append errors to lists for plotting later
   
    train_rmse_errors.append(train_RMSE)
    test_rmse_errors.append(test_RMSE)

In [None]:
poly = pd.DataFrame()
poly['Train error'] = train_rmse_errors
poly['Test error'] = test_rmse_errors
new_index = "1 2 3 4 5 6 7 8 9".split()
poly['degree'] = new_index
poly.set_index('degree')

#### Minimum test error is found at degree 2, so it will be a good choice for the degree, can be seen from the plots below too

In [None]:
poly['Test error'].min()

In [None]:
plt.figure(dpi=150)
plt.plot(range(1,6),train_rmse_errors[:5],label='TRAIN')
plt.plot(range(1,6),test_rmse_errors[:5],label='TEST')
plt.xlabel("Polynomial Degree")
plt.ylabel("RMSE")
plt.legend()

In [None]:
plt.figure(dpi=150)
plt.plot(range(1,10),train_rmse_errors,label='TRAIN')
plt.plot(range(1,10),test_rmse_errors,label='TEST')
plt.xlabel("Polynomial Degree")
plt.ylabel("RMSE")
plt.legend()

In [None]:
plt.figure(dpi=150)
plt.plot(range(1,10),train_rmse_errors,label='TRAIN')
plt.plot(range(1,10),test_rmse_errors,label='TEST')
plt.xlabel("Polynomial Degree")
plt.ylabel("RMSE")
plt.ylim(0,100)
plt.legend()

## Finalizing Model Choice

In [None]:
final_poly_converter = PolynomialFeatures(degree=2,include_bias=False)

In [None]:
polynomial_features = final_poly_converter.fit_transform(x)

In [None]:
scaled_polynomial_features = scaler.fit_transform(polynomial_features)

In [None]:
scaled_polynomial_features

In [None]:
final_model = LinearRegression()

In [None]:
final_model.fit(scaled_polynomial_features,y)

## Predicting on new data

In [None]:
new_data = [[150,20,100]]

In [None]:
new_data_polynomial = polynomial_conevrter.fit_transform(new_data)

In [None]:
new_data_polynomial

In [None]:
new_data_scaled = scaler.transform(new_data_polynomial)

In [None]:
new_data_scaled

In [None]:
final_model.predict(new_data_scaled)