In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns  
%matplotlib inline

In [None]:
df = pd.read_csv('../input/advertising-dataset/advertising.csv')

# Data overview

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.shape

# Exploratory Data Analysis

In [None]:
sns.pairplot(df, diag_kind='kde')

In [None]:
sns.histplot(df['Sales'] , kde=True)

In [None]:
sns.distplot(df['Sales'])

In [None]:
sns.heatmap(df.corr(),annot=True, cmap='rocket', lw=4)

# Determine the Features and the Target variables

In [None]:
X=df.drop('Sales', axis=1)
y=df.Sales

# Preprocessing

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
ploynomialConvertor = PolynomialFeatures(degree=2 , include_bias=False)

In [None]:
polyFeatures = ploynomialConvertor.fit_transform(X)

In [None]:
polyFeatures.shape

# Spliting data into train and test 

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(polyFeatures, y, test_size=0.3, random_state=101)

# Train the model 

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression()

In [None]:
model.fit(X_train , y_train)

# Predciting test data

In [None]:
y_pred = model.predict(X_test)

In [None]:
pd.DataFrame({'Y_Test':y_test , 'y_pred':y_pred , 'Residuals':(y_pred-y_test) }).head()

# Evaluating the model

In [None]:
from sklearn import metrics

In [None]:
MAE= metrics.mean_absolute_error(y_pred , y_test)
MSE=metrics.mean_squared_error(y_pred, y_test)
RMSE=np.sqrt(MSE)
pd.DataFrame([MAE , MSE , RMSE], index=['MAE', 'MSE', 'RMSE'] , columns=['Metrics'])

In [None]:
XS_train, XS_test, ys_train, ys_test = train_test_split(X, y, test_size=0.3, random_state=101)
linearmodel=LinearRegression()
linearmodel.fit(XS_train, ys_train)
ys_pred=linearmodel.predict(XS_test)

MAE_linear = metrics.mean_absolute_error(ys_test,ys_pred)
MSE_linear = metrics.mean_squared_error(ys_test,ys_pred)
RMSE_linear = np.sqrt(MSE_linear)
pd.DataFrame({'PolyMetrics':[MAE , MSE ,RMSE] , 'LinearMetrics':[MAE_linear, MSE_linear ,RMSE_linear]}, index=['MAE', 'MSE', 'RMSE'])

#  Adjusting Model Parameters

In [None]:
# Train List of RMSE per degree
train_RMSE_list=[]
#Test List of RMSE per degree
test_RMSE_list=[]

for d in range(1,10):
    
    #Preprocessing
    #create poly data set for degree (d)
    polynomial_converter= PolynomialFeatures(degree=d, include_bias=False)
    poly_features= polynomial_converter.fit(X)
    poly_features= polynomial_converter.transform(X)
    
    #Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)
    
    #Train the Model
    polymodel=LinearRegression()
    polymodel.fit(X_train, y_train)
    
    #Predicting on both Train & Test Data
    y_train_pred=polymodel.predict(X_train)
    y_test_pred=polymodel.predict(X_test)
    
    #Evaluating the Model
    
    #RMSE of Train set
    train_RMSE=np.sqrt(metrics.mean_squared_error(y_train, y_train_pred))
    
    #RMSE of Test Set
    test_RMSE=np.sqrt(metrics.mean_squared_error(y_test, y_test_pred))
    
    #Append the RMSE to the Train and Test List
    
    train_RMSE_list.append(train_RMSE)
    test_RMSE_list.append(test_RMSE)

In [None]:
test_RMSE_list

# Plot the Polynomial degree VS RMSE

In [None]:
plt.plot(range(1,6), train_RMSE_list[:5], label='Train RMSE')
plt.plot(range(1,6), test_RMSE_list[:5], label='Test RMSE')

plt.xlabel('Polynomial Degree')
plt.ylabel('RMSE')
plt.legend()

# Finalizing Model Parameters

In [None]:
#Create Polynomial Features
Final_Polynomial_converter= PolynomialFeatures(degree=3, include_bias=False)

Final_poly_features= Final_Polynomial_converter.fit(X)
Final_poly_features= Final_Polynomial_converter.transform(X)

#Split the DataSet
X_train, X_test, y_train, y_test = train_test_split(Final_poly_features, y, test_size=0.3, random_state=101)

#Train the model
Final_PolyModel=LinearRegression()
Final_PolyModel.fit(X_train, y_train)

# Saving the model 

In [None]:
from joblib import dump , load

In [None]:
dump(Final_PolyModel, 'Sales_Poly_Model.joblib')

In [None]:
dump(Final_Polynomial_converter, 'Ploynomial_Converter.joblib')

# Deployment

In [None]:
Load_Converter=load('Ploynomial_Converter.joblib')
Load_PolyModel=load('Sales_Poly_Model.joblib')

In [None]:
#Predict the sale for a campaign with advertising in TV=150, radio=25, newspaper=10
campaign=[[149,22,12]]

In [None]:
campaign_poly=Load_Converter.transform(campaign)

In [None]:
Final_PolyModel.predict(campaign_poly)