In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import datetime

In [4]:
data = pd.read_csv('Real_Estate.csv')

In [6]:
data['Transaction date'] = pd.to_datetime(data['Transaction date'])
data['Transaction Year'] = data['Transaction date'].dt.year
data['Transaction month'] = data['Transaction date'].dt.month

In [8]:
dataClean = data.drop(columns=['Transaction date'])

In [9]:
X = dataClean.drop('House price of unit area', axis = 1)
Y = dataClean['House price of unit area']

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [11]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [13]:
X_train_scaled.shape

(331, 7)

In [14]:
X_test_scaled.shape

(83, 7)

In [15]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score

In [16]:
models = {
    'LinearRegression' : LinearRegression(),
    'DecisionTreeRegressor' : DecisionTreeRegressor(random_state=42),
    'RandomForestRegressor' : RandomForestRegressor(random_state=42),
    'GradientBoostingRegressor' : GradientBoostingRegressor(random_state=42)
}

# dictionary to hold the evaluation metrics for each model
results = {}

In [17]:
# train and evaluate each model
for name, model in models.items():
    # training the model
    model.fit(X_train_scaled, Y_train)

    # making predictions on the test set
    predictions = model.predict(X_test_scaled)

    # calculating evaluation metrics
    mae = mean_absolute_error(Y_test, predictions)
    r2 = r2_score(Y_test, predictions)

    # storing the metrics
    results[name] = {"MAE": mae, "R²": r2}

results_df = pd.DataFrame(results).T  # convert the results to a DataFrame for better readability
print(results_df)

                                 MAE        R²
LinearRegression            9.518530  0.549566
DecisionTreeRegressor      11.415789  0.176253
RandomForestRegressor       9.804854  0.522600
GradientBoostingRegressor  10.088533  0.475219
