In [2]:
import pandas as pd

train_data = pd.read_csv('../../data/processed/train.csv')
test_data = pd.read_csv('../../data/processed/test.csv')

# Drop the address column
train_data = train_data.drop(columns=['address'])
test_data = test_data.drop(columns=['address'])

# One-hot encode the 'flat_model' since it's categorical
train_data = pd.get_dummies(train_data, columns=['flat_model'], drop_first=True)
test_data = pd.get_dummies(test_data, columns=['flat_model'], drop_first=True)


feature_columns = train_data.columns.difference({'resale_price'}).tolist()
target_column = 'resale_price'


# Reorder the columns of the test data to match train data order
test_data = test_data[feature_columns + [target_column]]

X_train = train_data[feature_columns]
y_train = train_data[target_column]

X_test = test_data[feature_columns]
y_test = test_data[target_column]

In [3]:
from sklearn.ensemble import RandomForestRegressor

# Initialize the Random Forest Regressor
rf_model = RandomForestRegressor(random_state=42)

# Fit the model on the training data
rf_model.fit(X_train, y_train)

# You can view some of the parameters if desired
print("Random Forest Model Parameters:", rf_model.get_params())


Random Forest Model Parameters: {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}


In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Predict resale prices on the test set
rf_predictions = rf_model.predict(X_test)


# Calculate evaluation metrics for Random Forest predictions
test_data['rf_predicted_price'] = rf_predictions
rf_mae = mean_absolute_error(y_test, rf_predictions)
rf_rmse = np.sqrt(mean_squared_error(y_test, rf_predictions))

print("\nEvaluation Metrics for Random Forest Regression:")
print("Mean Absolute Error (MAE):", rf_mae)
print("Root Mean Squared Error (RMSE):", rf_rmse)



Evaluation Metrics for Random Forest Regression:
Mean Absolute Error (MAE): 51672.459607081415
Root Mean Squared Error (RMSE): 77904.03342693887
