In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df=pd.read_csv('../input/usa-housing/USA_Housing.csv')

In [None]:
df.head()

In [None]:
df.shape

Determine the Features and Target Variable

In [None]:
y=df['Price']
X=df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
       'Avg. Area Number of Bedrooms', 'Area Population']]

Preprocessing

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
polynomial_converter=PolynomialFeatures(degree=2,include_bias=False)

In [None]:
poly_features=polynomial_converter.fit(X)

In [None]:
poly_features=polynomial_converter.transform(X)

In [None]:
poly_features.shape

In [None]:
X.shape

Split the Data to Train and Test

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test=train_test_split(poly_features, y, test_size=0.3,random_state=101)

Training the Model

In [None]:
from sklearn.linear_model import LinearRegression


In [None]:
polymodel=LinearRegression()
polymodel.fit(X_train,y_train)

Predicting test data

In [None]:
y_pred=polymodel.predict(X_test)

In [None]:
pd.DataFrame({'y_test':y_test,'y_pred':y_pred,'Residuals':(y_test-y_pred)}).head(4)

Evaluating the model performance

In [None]:
from sklearn import metrics

In [None]:
MAE_Poly=metrics.mean_absolute_error(y_test,y_pred) 

In [None]:
MSE_Poly=metrics.mean_squared_error(y_test,y_pred)

In [None]:
RMSE_Poly=np.sqrt(MSE)

In [None]:
pd.DataFrame([MAE,MSE,RMSE],index=['MAE', 'MSE', 'RMSE'], columns=['Metrics'])

compare to Simple Linear Regression

In [None]:
from sklearn.model_selection import train_test_split
XS_train,XS_test,ys_train,ys_test=train_test_split(X, y, test_size=0.3,random_state=101)

In [None]:
from sklearn.linear_model import LinearRegression
Simplemodel=LinearRegression()
Simplemodel.fit(XS_train,ys_train)

In [None]:
ys_pred=Simplemodel.predict(XS_test)
MAE_simple=metrics.mean_absolute_error(ys_test,ys_pred) 
MSE_simple=metrics.mean_squared_error(ys_test,ys_pred)
RMSE_simple=np.sqrt(MSE_simple)

In [None]:
pd.DataFrame({'Poly Metrics':[MAE_Poly,MSE_Poly,RMSE_Poly],'Simple Metrics':[MAE_simple,MSE_simple,RMSE_simple]}, index=['MAE','MSE','RMSE'])

Adjusting Model Parameters

In [None]:
train_RMSE_list=[]
test_RMSE_list=[]

for d in range(1,10):
    polynomial_converter=PolynomialFeatures(degree=d,include_bias=False)
    poly_features=polynomial_converter.fit(X)
    poly_features=polynomial_converter.transform(X)
    X_train,X_test,y_train,y_test=train_test_split(poly_features, y, test_size=0.3,random_state=101)
    polymodel=LinearRegression()
    polymodel.fit(X_train,y_train)
    y_train_pred=polymodel.predict(X_train)
    y_test_pred=polymodel.predict(X_test)
    
    train_RMSE=np.sqrt(metrics.mean_squared_error(y_train,y_train_pred))
    test_RMSE=np.sqrt(metrics.mean_squared_error(y_test, y_test_pred))
    train_RMSE_list.append(train_RMSE)
    test_RMSE_list.append(test_RMSE)
    

In [None]:
train_RMSE_list


In [None]:
test_RMSE_list


In [None]:
plt.plot( train_RMSE_list[:5], label='Train RMSE')
plt.plot( test_RMSE_list[:5], label='Test RMSE')

plt.xlabel('Polynomial Degree')
plt.ylabel('RMSE')
plt.legend()

In [None]:
Final_polynomial_converter= PolynomialFeatures(degree=2, include_bias=False)

Final_poly_features= Final_polynomial_converter.fit(X)
Final_poly_features= Final_polynomial_converter.transform(X)
X_train, X_test, y_train, y_test = train_test_split(Final_poly_features, y, test_size=0.3, random_state=101)
Final_PolyModel=LinearRegression()
Final_PolyModel.fit(X_train, y_train)

In [None]:
from joblib import dump, load

In [None]:
dump(Final_PolyModel, 'Sales_PolyModel.joblib')

In [None]:
dump(Final_polynomial_converter, 'Poly_Converter.joblib')

In [None]:
Load_Converter=load('Poly_Converter.joblib')
Load_PolyModel=load('Sales_PolyModel.joblib')

In [None]:
campaign=[[61287.067179,6.002900,5,3,40000]]  

In [None]:
campaign_poly=Load_Converter.transform(campaign)

In [None]:
Final_PolyModel.predict(campaign_poly)