## Import important Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Import Data Set

In [None]:
df=pd.read_csv('../input/usa-housing/USA_Housing.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

## Exploratory Data Analysis

In [None]:
sns.pairplot(df,diag_kind='kde')

In [None]:
sns.distplot(df['Price'])

In [None]:
sns.heatmap(df.corr(),annot=True,cmap='Greens')

## Determine The Features and Label

In [None]:
X=df[['Avg. Area Income','Avg. Area House Age','Avg. Area Number of Rooms','Avg. Area Number of Bedrooms','Area Population']]
y=df['Price']

## Preprocessing

In [None]:
from sklearn.preprocessing import PolynomialFeatures
polynomial_converter=PolynomialFeatures(degree=2,interaction_only=False)
polynomial_features=polynomial_converter.fit(X)
polynomial_features=polynomial_converter.transform(X)
polynomial_features.shape

## Split Data to Train & Test

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(polynomial_features, y, test_size=0.3, random_state=101)
X_train.shape

## Train the Model

In [None]:
from sklearn.linear_model import LinearRegression
PolyModel=LinearRegression()
PolyModel.fit(X_train,y_train)

## Predict Test Data

In [None]:
y_pred=PolyModel.predict(X_test)
pd.DataFrame({'y_test':y_test,'y_pred':y_pred,'Residuals':(y_test-y_pred)}).head()

## Evaluate the Model

In [None]:
from sklearn import metrics
MAE_Poly=metrics.mean_absolute_error(y_test,y_pred)
MSE_Poly=metrics.mean_squared_error(y_test,y_pred)
RMSE_Poly=np.sqrt(MSE_Poly)

In [None]:
pd.DataFrame([MAE_Poly,MSE_Poly,RMSE_Poly],index=['MAE_Poly','MSE_Poly','RMSE_Poly'],columns=['Metrics'])

## Adjusting Model Parameters

In [None]:
Train_RMSE_list=[]
Test_RMSE_list=[]
for d in range(1,10):
    from sklearn.preprocessing import PolynomialFeatures
    polynomial_converter=PolynomialFeatures(degree=d,interaction_only=False)
    polynomial_features=polynomial_converter.fit(X)
    polynomial_features=polynomial_converter.transform(X)
    # Split data to Train and Test
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(polynomial_features, y, test_size=0.3, random_state=101)
    # Train the Model
    from sklearn.linear_model import LinearRegression
    PolyModel=LinearRegression()
    PolyModel.fit(X_train,y_train)
    # Predict
    y_train_pred=PolyModel.predict(X_train)
    y_test_pred=PolyModel.predict(X_test)
    # Evaluate the Model just by RMSE 
    from sklearn import metrics
    # RMSE of Train
    Train_RMSE=np.sqrt(metrics.mean_squared_error(y_train,y_train_pred))
    # RMSE of Test
    Test_RMSE=np.sqrt(metrics.mean_squared_error(y_test,y_test_pred))
    # Make a list for RMSE
    Train_RMSE_list.append(Train_RMSE)
    Test_RMSE_list.append(Test_RMSE)  

In [None]:
plt.plot(range(1,10),Train_RMSE_list,label='Train_RMSE')
plt.plot(range(1,10),Test_RMSE_list,label='Test_RMSE')
plt.xlabel('Polynomial Degree',fontsize=15)
plt.ylabel('RME',fontsize=15)
plt.legend()

## In conclusion , Linear Regression works better than Polynomial Regression in this data set