# Model

In [179]:
import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.ensemble import GradientBoostingRegressor

from sklearn.metrics import mean_absolute_error

In [180]:
data = pd.read_csv("clean_data.csv")

## Removing outliers with interquantile range

In [181]:
Q1 = data["Price"].quantile(0.25)
Q3 = data["Price"].quantile(0.75)
IQR = Q3 - Q1

lower_bound = Q1 - (1.5*IQR)
upper_bound = Q3 + (1.5*IQR)

print(lower_bound)
print(upper_bound)
data = data[(data["Price"] >= lower_bound) & (data["Price"] <= upper_bound)]
data.head(20)

-158500.0
797500.0


Unnamed: 0.1,Unnamed: 0,BedroomCount,ConstructionYear,Fireplace,FloodingZone,Furnished,GardenArea,LivingArea,MonthlyCharges,NumberOfFacades,...,ToiletCount,TypeOfProperty,SubtypeOfProperty_num,TypeOfSale_num,PEB_num,Kitchen_num,StateOfBuilding_num,FloodingZone_num,Region_num,Province_num
0,0,1.0,1969,0.0,NO_DATA,0.0,0.0,29.0,0.0,2.0,...,1.0,2,1,0,0,0,4,0,2,10
1,1,13.0,1920,0.0,NO_DATA,0.0,0.0,391.0,0.0,3.0,...,5.0,1,17,0,4,0,4,0,1,1
2,2,4.0,2008,0.0,NON_FLOOD_ZONE,1.0,0.0,111.0,0.0,2.0,...,2.0,1,8,0,6,5,4,10,2,10
3,3,4.0,1987,0.0,NO_DATA,0.0,1.0,167.0,0.0,2.0,...,2.0,1,8,0,2,0,3,0,2,10
4,4,2.0,1972,0.0,NON_FLOOD_ZONE,0.0,0.0,92.0,0.0,2.0,...,1.0,2,3,0,6,0,5,10,2,6
5,5,1.0,1994,0.0,NO_DATA,1.0,0.0,50.0,0.0,2.0,...,1.0,2,3,0,3,7,5,0,3,11
6,6,6.0,1970,1.0,NON_FLOOD_ZONE,0.0,2519.0,425.0,0.0,4.0,...,5.0,1,13,1,5,5,4,10,1,5
7,7,2.0,1987,0.0,NON_FLOOD_ZONE,0.0,0.0,167.0,0.0,3.0,...,0.0,1,8,0,2,0,0,10,2,9
8,8,3.0,2023,0.0,NO_DATA,0.0,0.0,167.0,0.0,3.0,...,0.0,2,3,0,7,0,0,0,2,8
9,9,2.0,1987,0.0,NO_DATA,0.0,0.0,101.0,0.0,2.0,...,0.0,2,3,0,0,0,0,0,2,7


## Splitting the dataset

In [182]:
X = data[["BedroomCount","TypeOfProperty","SubtypeOfProperty_num","Region_num","Province_num","StateOfBuilding_num","Kitchen_num","PEB_num","TypeOfSale_num","LivingArea","GardenArea","SurfaceOfPlot","ConstructionYear","NumberOfFacades","MonthlyCharges"]]

y = data["Price"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [183]:
model = GradientBoostingRegressor()
model.fit(X_train, y_train)

In [184]:
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
print(mae)

67714.83309276657
