In [None]:
# ================================================
# TASK 6: HOUSE PRICE PREDICTION (SINGLE CELL)
# ================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import GradientBoostingRegressor

sns.set(style="whitegrid")

# -----------------------------
# 1Ô∏è‚É£ LOAD DATASET
# -----------------------------
df = pd.read_csv("/content/House Price Prediction Dataset.csv")
print("Dataset Loaded Successfully!\n")
display(df.head())

# -----------------------------
# 2Ô∏è‚É£ BASIC INFO
# -----------------------------
print("\nDataset Info:")
print(df.info())

print("\nSummary Stats:")
display(df.describe())

# -----------------------------
# 3Ô∏è‚É£ SELECT FEATURES
# -----------------------------
df = df[['Area', 'Bedrooms', 'Bathrooms', 'Floors', 'Location', 'Price']]
X = df.drop('Price', axis=1)
y = df['Price']

print("\nSelected Features:")
display(df.head())

# -----------------------------
# 4Ô∏è‚É£ PREPROCESSING
# Encode categorical feature ‚Üí Location
# -----------------------------
categorical_features = ['Location']
preprocess = ColumnTransformer(
    transformers=[("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)],
    remainder='passthrough'
)

# -----------------------------
# 5Ô∏è‚É£ TRAIN-TEST SPLIT
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------
# 6Ô∏è‚É£ MODEL TRAINING
# Gradient Boosting Regressor
# -----------------------------
model = GradientBoostingRegressor()
print("\nTraining Model...")
model.fit(preprocess.fit_transform(X_train), y_train)

# -----------------------------
# 7Ô∏è‚É£ PREDICTIONS
# -----------------------------
y_pred = model.predict(preprocess.transform(X_test))

# -----------------------------
# 8Ô∏è‚É£ METRICS
# -----------------------------
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"\nüìå MODEL PERFORMANCE")
print(f"MAE : {mae:,.2f}")
print(f"RMSE: {rmse:,.2f}")

# -----------------------------
# 9Ô∏è‚É£ VISUALIZATION
# -----------------------------
plt.figure(figsize=(7,5))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.grid(True)
plt.show()

plt.figure(figsize=(7,5))
sns.histplot(y_test - y_pred, kde=True)
plt.title("Prediction Error Distribution")
plt.xlabel("Prediction Error")
plt.show()

print("\nTask Completed Successfully ‚úî")