In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor as GradientRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
dataset_path = './Housing.csv'
df = pd.read_csv(dataset_path)

In [None]:
categorical_cols = df.select_dtypes(include=['object']).columns.to_list()
print(categorical_cols)

['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']


In [None]:
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(
    df[categorical_cols]
)
encoded_categorical_df = pd.DataFrame(
    encoded_categorical_cols,
    columns = categorical_cols
)
numerical_df = df.drop(categorical_cols, axis=1)
encoded_df = pd.concat(
    [numerical_df, encoded_categorical_df], axis=1
)
print(encoded_df.head())

      price  area  bedrooms  bathrooms  stories  parking  mainroad  guestroom  \
0  13300000  7420         4          2        3        2       1.0        0.0   
1  12250000  8960         4          4        4        3       1.0        0.0   
2  12250000  9960         3          2        2        2       1.0        0.0   
3  12215000  7500         4          2        2        3       1.0        0.0   
4  11410000  7420         4          1        2        2       1.0        1.0   

   basement  hotwaterheating  airconditioning  prefarea  furnishingstatus  
0       0.0              0.0              1.0       1.0               0.0  
1       0.0              0.0              1.0       0.0               0.0  
2       1.0              0.0              0.0       1.0               1.0  
3       1.0              0.0              1.0       1.0               0.0  
4       1.0              0.0              1.0       0.0               0.0  


In [None]:
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)
print(dataset_arr)

[[ 4.56636513  1.04672629  1.40341936 ...  1.4726183   1.80494113
  -1.40628573]
 [ 4.00448405  1.75700953  1.40341936 ...  1.4726183  -0.55403469
  -1.40628573]
 [ 4.00448405  2.21823241  0.04727831 ... -0.67906259  1.80494113
  -0.09166185]
 ...
 [-1.61432675 -0.70592066 -1.30886273 ... -0.67906259 -0.55403469
   1.22296203]
 [-1.61432675 -1.03338891  0.04727831 ... -0.67906259 -0.55403469
  -1.40628573]
 [-1.61432675 -0.5998394   0.04727831 ... -0.67906259 -0.55403469
   1.22296203]]


In [None]:
X, y = dataset_arr[:, 1:], dataset_arr[:, 0]

In [None]:
test_size = 0.3
random_state = 1
in_shuffle = True
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=test_size,
    random_state=random_state,
    shuffle=in_shuffle
)

In [None]:
regressor = RandomForestRegressor(
    random_state=random_state
)
regressor.fit(X_train, y_train)

In [None]:
regressor = AdaBoostRegressor(
    random_state=random_state
)
regressor.fit(X_train, y_train)

In [None]:
regressor = GradientRegressor(
    random_state=random_state
)
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_val)

In [None]:
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
print(f'MAE: {mae}')
print(f'MSE: {mse}')

MAE: 0.4516626127750995
MSE: 0.39610445936979427
