# Real Estate Price Prediction using Machine Learning

**Type:** Regression

**Use Case**: Real Estate agencies / Property dealers

**Tools:** Google Colab + Python + ML + Streamlit

# 🔹 FINAL DELIVERABLE
✔ House price prediction model

✔ Linear Regression + Random Forest

✔ CSV upload support

✔ Streamlit Web App

✔ Clean notebook (Colab)

✔ GitHub ready project

In [None]:
!pip  install -U pandas numpy matplotlib seaborn scikit-learn streamlit


### Data Load

In [None]:
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Housing.csv")
df.head()

# check
df.info()
df.isnull().sum()


### Data Cleaning & Preprocessing

In [None]:
df = df.dropna()
# Features & Target
X = df.drop("price", axis=1)
y = df["price"]
# Train-Test Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)



### Linear Regression Model

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
import joblib

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Columns
categorical_cols = X.select_dtypes(include=["object"]).columns
numerical_cols = X.select_dtypes(exclude=["object"]).columns

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ("num", "passthrough", numerical_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols)
    ]
)

# Full Pipeline
model = Pipeline(
    steps=[
        ("preprocessor", preprocessor),
        ("regressor", RandomForestRegressor(
            n_estimators=100,
            random_state=42
        ))
    ]
)

# Train
model.fit(X_train, y_train)

# Save FULL pipeline
joblib.dump(model, "/content/house_price_model.pkl")


### Accuracy (R² Score)

In [None]:
from sklearn.metrics import r2_score

rf_pipeline_pred = model.predict(X_test)
print("Random Forest Pipeline R2:", r2_score(y_test, rf_pipeline_pred))

### Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor

# Apply the preprocessor to transform X_train and X_test
X_train_encoded = preprocessor.fit_transform(X_train)
X_test_encoded = preprocessor.transform(X_test)

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train_encoded, y_train)

rf_pred = rf.predict(X_test_encoded)

print("Random Forest R2:", r2_score(y_test, rf_pred))

### Model Save

In [None]:
sample = X_train.iloc[:1]
model.predict(sample)


In [None]:
print(X.columns)


## Gradio Web App (Extra Freelancing Value 🔥)

In [None]:
import gradio as gr
import joblib
import pandas as pd

model = joblib.load("/content/house_price_model.pkl")

def predict_price(
    area, bedrooms, bathrooms, stories,
    mainroad, guestroom, basement,
    hotwaterheating, airconditioning,
    parking, prefarea, furnishingstatus
):
    input_df = pd.DataFrame({
        "area": [area],
        "bedrooms": [bedrooms],
        "bathrooms": [bathrooms],
        "stories": [stories],
        "mainroad": [mainroad],
        "guestroom": [guestroom],
        "basement": [basement],
        "hotwaterheating": [hotwaterheating],
        "airconditioning": [airconditioning],
        "parking": [parking],
        "prefarea": [prefarea],
        "furnishingstatus": [furnishingstatus]
    })

    prediction = model.predict(input_df)[0]
    return f"Estimated Price: {prediction:,.0f}"

app = gr.Interface(
    fn=predict_price,
    inputs=[
        gr.Number(label="Area (sqft)"),
        gr.Number(label="Bedrooms"),
        gr.Number(label="Bathrooms"),
        gr.Number(label="Stories"),

        gr.Radio(["yes", "no"], label="Main Road Access"),
        gr.Radio(["yes", "no"], label="Guest Room"),
        gr.Radio(["yes", "no"], label="Basement"),
        gr.Radio(["yes", "no"], label="Hot Water Heating"),
        gr.Radio(["yes", "no"], label="Air Conditioning"),
        gr.Number(label="Parking Spaces"),
        gr.Radio(["yes", "no"], label="Preferred Area"),

        gr.Dropdown(
            ["furnished", "semi-furnished", "unfurnished"],
            label="Furnishing Status"
        )
    ],
    outputs=gr.Textbox(label="Predicted House Price"),
    title="🏠 House Price Prediction App",
    description="Enter complete house details to predict price using Machine Learning"
)

app.launch()
