In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn import metrics

%matplotlib inline

In [None]:
df=pd.read_csv('../input/real-estate-price-prediction/Real estate.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
pd.DataFrame(list(df.isnull().any()),index=list(df.columns))

In [None]:
sns.heatmap(df.corr(), annot=True)

In [None]:
x=df.drop(['Y house price of unit area','No'], axis=1)

y=df['Y house price of unit area']

In [None]:
# Train List of RMSE per degree
train_RMSE_list=[]
#Test List of RMSE per degree
test_RMSE_list=[]

for d in range(1,10):
    
    #Preprocessing
    #create poly data set for degree (d)
    polynomial_converter= PolynomialFeatures(degree=d)
    poly_features= polynomial_converter.fit(x)
    poly_features= polynomial_converter.transform(x)
    
    #Split the dataset
    X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)
    
    #Train the Model
    polymodel=LinearRegression()
    polymodel.fit(X_train, y_train)
    
    #Predicting on both Train & Test Data
    y_train_pred=polymodel.predict(X_train)
    y_test_pred=polymodel.predict(X_test)
    
    #RMSE of Train set
    train_RMSE=np.sqrt(metrics.mean_squared_error(y_train, y_train_pred))
    
    #RMSE of Test Set
    test_RMSE=np.sqrt(metrics.mean_squared_error(y_test, y_test_pred))
    
    #Append the RMSE to the Train and Test List
    train_RMSE_list.append(train_RMSE)
    test_RMSE_list.append(test_RMSE)

In [None]:
display(pd.DataFrame({'degree': list(range(1, 10)),'train_RMSE': train_RMSE_list,'test_RMSE':test_RMSE_list}).set_index('degree'))

fig = plt.figure(figsize=(10,5))
plt.plot(range(1,5), train_RMSE_list[:4], label='Train RMSE')
plt.plot(range(1,5), test_RMSE_list[:4], label='Test RMSE')

plt.xlabel('Polynomial Degree')
plt.ylabel('RMSE')
plt.legend()
plt.show()

## create poly data set for degree 2

In [None]:
polynomial_converter= PolynomialFeatures(degree=2)
poly_features= polynomial_converter.fit(x)
poly_features= polynomial_converter.transform(x)

## Split the dataset



In [None]:
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)


## Train the Model


In [None]:
polymodel=LinearRegression()
polymodel.fit(X_train, y_train)

## Predicting on both Train & Test Data


In [None]:
y_train_pred=polymodel.predict(X_train)
y_test_pred=polymodel.predict(X_test)

In [None]:
test_residuals = y_test - y_test_pred

In [None]:
sns.scatterplot(x=y_test, y=test_residuals)
plt.axhline(y=0, color='r', ls='--')

In [None]:
sns.kdeplot(test_residuals)