In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [7]:
dataset_path = "./Housing.csv"
df = pd.read_csv(dataset_path)

pd.set_option('display.float_format', '{:.0f}'.format)
df.describe()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
count,545,545,545,545,545,545
mean,4766729,5151,3,1,2,1
std,1870440,2170,1,1,1,1
min,1750000,1650,1,1,1,0
25%,3430000,3600,2,1,1,0
50%,4340000,4600,3,1,2,0
75%,5740000,6360,3,2,2,1
max,13300000,16200,6,4,4,3


## Categorical columns

In [11]:
categorical_cols = df.select_dtypes(include=['object']).columns.to_list()
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(df[categorical_cols])
encoded_categorical_cols = pd.DataFrame(encoded_categorical_cols, columns=categorical_cols)
numerical_df = df.drop(categorical_cols, axis=1)
encoded_df = pd.concat([numerical_df, encoded_categorical_cols], axis=1)

encoded_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking,mainroad,guestroom,basement,hotwaterheating,airconditioning,prefarea,furnishingstatus
0,13300000,7420,4,2,3,2,1,0,0,0,1,1,0
1,12250000,8960,4,4,4,3,1,0,0,0,1,0,0
2,12250000,9960,3,2,2,2,1,0,1,0,0,1,1
3,12215000,7500,4,2,2,3,1,0,1,0,1,1,0
4,11410000,7420,4,1,2,2,1,1,1,0,1,0,0


## Normalize the data

In [13]:
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)

## Split the data

In [15]:
X, y = dataset_arr[:, 1:], dataset_arr[:, 0]
test_size = 0.3
random_state = 1
is_shuffle = True
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=is_shuffle)

In [16]:
random_forest_regressor = RandomForestRegressor(random_state=random_state)
random_forest_regressor.fit(X_train, y_train)

ada_boost = AdaBoostRegressor(random_state=random_state)
ada_boost.fit(X_train, y_train)

gradient_boost = GradientBoostingRegressor(random_state=random_state)
gradient_boost.fit(X_train, y_train)

## value prediction

In [21]:
random_y_pred = random_forest_regressor.predict(X_val)
random_mae = mean_absolute_error(y_val, random_y_pred)
random_mse = mean_squared_error(y_val, random_y_pred)
print(f"Random Forest MAE: {random_mae}")
print(f"Random Forest MSE: {random_mse}")
print("==============")

ada_boost_y_pred = ada_boost.predict(X_val)
ada_boost_mae = mean_absolute_error(y_val, ada_boost_y_pred)
ada_boost_mse = mean_squared_error(y_val, ada_boost_y_pred)
print(f"Ada Boost MAE: {ada_boost_mae}")
print(f"Ada Boost MSE: {ada_boost_mse}")
print("==============")

gradient_boost_y_pred = gradient_boost.predict(X_val)
gradient_boost_mae = mean_absolute_error(y_val, gradient_boost_y_pred)
gradient_boost_mse = mean_squared_error(y_val, gradient_boost_y_pred)
print(f"Gradient Boost MAE: {gradient_boost_mae}")
print(f"Gradient Boost MSE: {gradient_boost_mse}")

Random Forest MAE: 0.4610288397546841
Ada Boost MAE: 0.567680019897059
Gradient Boost MAE: 0.4516626127750995
