In [None]:
# Comparison of MLP vs Classical Models for Housing Price Prediction
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam


In [None]:
# Load and preprocess
df = pd.read_csv("housing.csv")
df['total_bedrooms'] = SimpleImputer(strategy='median').fit_transform(df[['total_bedrooms']])
df = pd.get_dummies(df, columns=['ocean_proximity'], drop_first=True)
X = df.drop(columns='median_house_value')
y = df['median_house_value']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [None]:
# Function for evaluation
def evaluate_model(name, y_true, y_pred, results):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    results.append({"Model": name, "MAE": mae, "RMSE": rmse, "R2": r2})
    print(f"{name} Evaluation:\nMAE = {mae:.2f}, RMSE = {rmse:.2f}, R2 = {r2:.4f}\n")
results = []


In [None]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
evaluate_model("Linear Regression", y_test, y_pred_lr, results)


In [None]:
# Random Forest
rf = RandomForestRegressor(random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
evaluate_model("Random Forest", y_test, y_pred_rf, results)


In [None]:
# XGBoost
xgb = XGBRegressor(objective='reg:squarederror', random_state=42)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
evaluate_model("XGBoost", y_test, y_pred_xgb, results)


In [None]:
# MLP
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1)
])
model.compile(optimizer=Adam(0.001), loss='mse', metrics=['mae'])
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=0)
y_pred_mlp = model.predict(X_test).flatten()
evaluate_model("MLP", y_test, y_pred_mlp, results)


In [None]:
# Save and plot results
df_results = pd.DataFrame(results)
df_results.to_csv("model_comparison_results.csv", index=False)
plt.figure(figsize=(10, 6))
bar_width = 0.25
x = np.arange(len(df_results['Model']))
plt.bar(x - bar_width, df_results['MAE'], width=bar_width, label='MAE')
plt.bar(x, df_results['RMSE'], width=bar_width, label='RMSE')
plt.bar(x + bar_width, df_results['R2'], width=bar_width, label='R2 Score')
plt.xticks(x, df_results['Model'], rotation=15)
plt.ylabel("Scores")
plt.title("Model Performance Comparison")
plt.legend()
plt.tight_layout()
plt.savefig("model_comparison_chart.png")
plt.show()
