# Giving information about the data

In [1]:
import pandas as pd

data = pd.read_csv('./dataset/USA_Housing.csv')

# The head

In [2]:
data.head()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price,Address
0,79545.458574,5.682861,7.009188,4.09,23086.800503,1059034.0,"208 Michael Ferry Apt. 674\nLaurabury, NE 3701..."
1,79248.642455,6.0029,6.730821,3.09,40173.072174,1505891.0,"188 Johnson Views Suite 079\nLake Kathleen, CA..."
2,61287.067179,5.86589,8.512727,5.13,36882.1594,1058988.0,"9127 Elizabeth Stravenue\nDanieltown, WI 06482..."
3,63345.240046,7.188236,5.586729,3.26,34310.242831,1260617.0,USS Barnett\nFPO AP 44820
4,59982.197226,5.040555,7.839388,4.23,26354.109472,630943.5,USNS Raymond\nFPO AE 09386


# Data Description

In [3]:
data.describe()

Unnamed: 0,Avg. Area Income,Avg. Area House Age,Avg. Area Number of Rooms,Avg. Area Number of Bedrooms,Area Population,Price
count,5000.0,5000.0,5000.0,5000.0,5000.0,5000.0
mean,68583.108984,5.977222,6.987792,3.98133,36163.516039,1232073.0
std,10657.991214,0.991456,1.005833,1.234137,9925.650114,353117.6
min,17796.63119,2.644304,3.236194,2.0,172.610686,15938.66
25%,61480.562388,5.322283,6.29925,3.14,29403.928702,997577.1
50%,68804.286404,5.970429,7.002902,4.05,36199.406689,1232669.0
75%,75783.338666,6.650808,7.665871,4.49,42861.290769,1471210.0
max,107701.748378,9.519088,10.759588,6.5,69621.713378,2469066.0


# Columns

In [4]:
data.columns

Index(['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
       'Avg. Area Number of Bedrooms', 'Area Population', 'Price', 'Address'],
      dtype='object')

# Define our X and y

In [5]:
X = data[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
          'Avg. Area Number of Bedrooms', 'Area Population']]
y = data[['Price']]

print(X, y)

      Avg. Area Income  Avg. Area House Age  Avg. Area Number of Rooms  \
0         79545.458574             5.682861                   7.009188   
1         79248.642455             6.002900                   6.730821   
2         61287.067179             5.865890                   8.512727   
3         63345.240046             7.188236                   5.586729   
4         59982.197226             5.040555                   7.839388   
...                ...                  ...                        ...   
4995      60567.944140             7.830362                   6.137356   
4996      78491.275435             6.999135                   6.576763   
4997      63390.686886             7.250591                   4.805081   
4998      68001.331235             5.534388                   7.130144   
4999      65510.581804             5.992305                   6.792336   

      Avg. Area Number of Bedrooms  Area Population  
0                             4.09     23086.800503  
1  

# Importing the libraries

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error,r2_score
from sklearn.preprocessing import PolynomialFeatures

# Train and test

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a polynomial features

In [8]:
poly = PolynomialFeatures(degree = 2)
X_Train_poly = poly.fit_transform(X_train)
X_test_poly = poly.fit_transform(X_test)

# Polynomial Regression

In [9]:
model = LinearRegression()
model.fit(X_Train_poly, y_train)

# Predicting the model

In [11]:
Y_train_predict = model.predict(X_Train_poly)
Y_test_predict = model.predict(X_test_poly)

# Evaluate the model

In [12]:
train_rmse = root_mean_squared_error(y_train, Y_train_predict)
test_rmse = root_mean_squared_error(y_test, Y_test_predict)
train_r2_score = r2_score(y_train, Y_train_predict)
test_r2_score = r2_score(y_test, Y_test_predict)

# Printing the results

In [15]:
print(f"Train RMSE: {train_rmse:.2f}")
print(f"Test RMSE: {test_rmse:.2f}")
print(f"Train R2 Score: {train_r2_score:.2f}")
print(f"Test R2 Score: {test_r2_score:.2f}")

Train RMSE: 101169.53
Test RMSE: 100495.11
Train R2 Score: 0.92
Test R2 Score: 0.92
