### Step1: Import all Necessary Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Step2: Import the Dataset

In [None]:
df= pd.read_csv('../input/usa-housing/USA_Housing.csv')

### Step3: Data Overview

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

### Step4: Exploratory Data Analysis
**Same as this notebook:**
https://www.kaggle.com/mojtaba142/linearregression-usa-house-price

### Step5: Determine the Features and Target Variable

In [None]:
X=df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
       'Avg. Area Number of Bedrooms', 'Area Population']]
y=df['Price']

### Step6: Preprocessing

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
polynomial_converter=PolynomialFeatures(degree=2, include_bias=False)
poly_features=polynomial_converter.fit(X)
poly_features=polynomial_converter.transform(X)

In [None]:
poly_features.shape


In [None]:
X.shape

**Polynomial Features:**

**X1, X2, X3, X4, X5**

**X1^2, X2^2, X3^2, X4^2, X5^2**

**X1X2, X1X3, X1X4, X1X5, X2X3, X2X4, X2X5, X3X4, X3X5, X4X5**

### Step7: Split the Data to Train and Test

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)

### Step8: Train the Model

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
polymodel=LinearRegression()
polymodel.fit(X_train, y_train)

### Step9: Predicting Test Data

In [None]:
y_pred=polymodel.predict(X_test)

### Step10: Evaluating the Model Performance

In [None]:
from sklearn import metrics
MAE= metrics.mean_absolute_error(y_test, y_pred)
MSE= metrics.mean_squared_error(y_test, y_pred)
RMSE= np.sqrt(MSE)

pd.DataFrame([MAE, MSE, RMSE], index=['MAE', 'MSE', 'RMSE'], columns=['Metrics'])

### Step11: Adjusting Model Parameters

In [None]:
#Train List of RMSE per degree
train_RMSE_list=[]
#Test List of RMSE per degree
test_RMSE_list=[]

for d in range(1,15):
    
    #Create Polynomial Data for each degree: (Preprocessing)
    polynomial_converter= PolynomialFeatures(degree=d, include_bias=False)
    poly_features=polynomial_converter.fit(X)
    poly_features=polynomial_converter.transform(X)
    
    #Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)
    
    #Train the Model
    polymodel=LinearRegression()
    polymodel.fit(X_train, y_train)
    
    #Predicting on both Train and Test Data
    y_train_pred=polymodel.predict(X_train)
    y_test_pred=polymodel.predict(X_test)
    
    #Evaluating the Model
    
    #RMSE of Train set
    train_RMSE=np.sqrt(metrics.mean_squared_error(y_train, y_train_pred))
    
    #RMSE of Test set
    test_RMSE=np.sqrt(metrics.mean_squared_error(y_test, y_test_pred))
    
    #Append the RMSE to the Train and Test List
    train_RMSE_list.append(train_RMSE)
    test_RMSE_list.append(test_RMSE)

In [None]:
train_RMSE_list

In [None]:
test_RMSE_list

In [None]:
plt.plot(range(1,14), train_RMSE_list[:13], label='Train RMSE')
plt.plot(range(1,14), test_RMSE_list[:13], label='Test RMSE')

plt.xlabel('Polynomial Degree')
plt.ylabel('RMSE')
plt.legend()

## Note: It Shows that linear model performce better than polynomial model in this case