In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Load the dataset
file_path = "/content/drive/MyDrive/Real estate.csv"
real_estate_data = pd.read_csv(file_path)

# Prepare the data for training
X = real_estate_data.drop(columns=['Y house price of unit area'])  # Features
y = real_estate_data['Y house price of unit area']  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the decision tree model
decision_tree = DecisionTreeRegressor(random_state=42)
decision_tree.fit(X_train, y_train)

# Make predictions on the test set using decision tree
y_pred_dt = decision_tree.predict(X_test)

# Evaluate the decision tree model
mse_dt = mean_squared_error(y_test, y_pred_dt)
mae_dt = mean_absolute_error(y_test, y_pred_dt)
rmse_dt = np.sqrt(mse_dt)

print("Decision Tree Metrics:")
print("Mean Squared Error (MSE):", mse_dt)
print("Mean Absolute Error (MAE):", mae_dt)
print("Root Mean Squared Error (RMSE):", rmse_dt)
print()

# Train the random forest model
random_forest = RandomForestRegressor(random_state=42)
random_forest.fit(X_train, y_train)

# Make predictions on the test set using random forest
y_pred_rf = random_forest.predict(X_test)

# Evaluate the random forest model
mse_rf = mean_squared_error(y_test, y_pred_rf)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)

print("Random Forest Metrics:")
print("Mean Squared Error (MSE):", mse_rf)
print("Mean Absolute Error (MAE):", mae_rf)
print("Root Mean Squared Error (RMSE):", rmse_rf)

# Compare the performance of the models
if mse_rf < mse_dt and mae_rf < mae_dt and rmse_rf < rmse_dt:
    print("\nRandom Forest is preferred for analyzing the data.")
elif mse_rf > mse_dt and mae_rf > mae_dt and rmse_rf > rmse_dt:
    print("\nDecision Tree is preferred for analyzing the data.")
else:
    print("\nBoth models perform similarly. Further analysis may be needed.")

Decision Tree Metrics:
Mean Squared Error (MSE): 53.89337349397591
Mean Absolute Error (MAE): 5.360240963855421
Root Mean Squared Error (RMSE): 7.341210628634483

Random Forest Metrics:
Mean Squared Error (MSE): 31.859230951807227
Mean Absolute Error (MAE): 3.8668554216867457
Root Mean Squared Error (RMSE): 5.644398192173123

Random Forest is preferred for analyzing the data.
