In [38]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

In [39]:
pd.set_option('display.float_format', '{:.2f}'.format)
df = pd.read_csv("/content/drive/MyDrive/Machine Learning/house_prices.csv")
df.describe(include="all").transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
id,21613.0,4580301520.86,2876565571.31,1000102.0,2123049194.0,3904930410.0,7308900445.0,9900000190.0
price,21613.0,540182.16,367362.23,75000.0,321950.0,450000.0,645000.0,7700000.0
bedrooms,21613.0,3.37,0.93,0.0,3.0,3.0,4.0,33.0
bathrooms,21613.0,2.11,0.77,0.0,1.75,2.25,2.5,8.0
sqft_living,21613.0,2079.9,918.44,290.0,1427.0,1910.0,2550.0,13540.0
sqft_lot,21613.0,15106.97,41420.51,520.0,5040.0,7618.0,10688.0,1651359.0
floors,21613.0,1.49,0.54,1.0,1.0,1.5,2.0,3.5
waterfront,21613.0,0.01,0.09,0.0,0.0,0.0,0.0,1.0
view,21613.0,0.23,0.77,0.0,0.0,0.0,0.0,4.0
condition,21613.0,3.41,0.65,1.0,3.0,3.0,4.0,5.0


In [40]:
#Assign your input and output features
x = df.drop(['price', 'id'], axis=1)
y = df['price']

#Split your data into training and testing
trainX, testX, trainY, testY = train_test_split(x, y, test_size = 0.5)

# Scale your dataset with standard scaler
sc=StandardScaler()
scaler = sc.fit(trainX)
trainX_scaled = scaler.transform(trainX)
testX_scaled = scaler.transform(testX)

In [41]:
scaler.get_params()

{'copy': True, 'with_mean': True, 'with_std': True}

In [42]:
df_scaled=pd.DataFrame(trainX_scaled, columns=trainX.columns)
df_scaled.describe()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,sqft_living15,sqft_lot15
count,10806.0,10806.0,10806.0,10806.0,10806.0,10806.0,10806.0,10806.0,10806.0,10806.0,10806.0,10806.0,10806.0
mean,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,0.0
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,-3.73,-2.75,-1.95,-0.35,-0.91,-0.09,-0.3,-3.7,-5.69,-1.81,-0.66,-1.99,-0.45
25%,-0.41,-0.48,-0.71,-0.25,-0.91,-0.09,-0.3,-0.63,-0.56,-0.71,-0.66,-0.72,-0.28
50%,-0.41,0.17,-0.18,-0.19,0.01,-0.09,-0.3,-0.63,-0.56,-0.27,-0.66,-0.21,-0.19
75%,0.7,0.5,0.51,-0.11,0.93,-0.09,-0.3,0.91,0.29,0.53,0.62,0.55,-0.1
max,7.34,7.63,12.46,26.97,3.71,11.65,5.0,2.44,4.56,9.17,10.31,5.55,31.17


In [43]:
mlp_reg = MLPRegressor(hidden_layer_sizes=(150,100,50), max_iter = 300,activation = 'relu',  solver = 'adam')
mlp_reg.fit(trainX_scaled, trainY)



In [44]:
y_pred = mlp_reg.predict(testX_scaled)
prediction_table = pd.DataFrame({'Actual': testY, 'Predicted': y_pred})
prediction_table.head(10)

Unnamed: 0,Actual,Predicted
18735,325000,349750.75
1769,299000,385335.88
18065,220000,481425.27
95,905000,584941.92
7123,519900,512946.42
598,515000,829091.6
15782,579000,550840.32
5143,754300,412336.98
1374,1130000,770388.65
17034,600000,374316.72


In [45]:
print('Mean Absolute Error:', metrics.mean_absolute_error(testY, y_pred))
print('Mean Absolute Percentage Error MAPE:', metrics.mean_absolute_percentage_error(testY, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(testY, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(testY, y_pred)))
print('Mean R-Square:', metrics.r2_score(testY, y_pred))

Mean Absolute Error: 135113.89992781685
Mean Absolute Percentage Error MAPE: 0.2790722137269022
Mean Squared Error: 42485568691.70071
Root Mean Squared Error: 206120.27724535184
Mean R-Square: 0.6935849085966561
