# House Price Prediction

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
# -----------------------------
# 1. Load the dataset
# -----------------------------
# Replace 'house_prices.csv' with the path to your dataset.
data = pd.read_csv('house_prices.csv')

In [None]:
# -----------------------------
# 2. Data Cleaning
# -----------------------------
# Drop rows where the target 'price' is missing.
data = data.dropna(subset=['price'])

# For numerical columns, fill missing values with the column median.
num_cols = data.select_dtypes(include=[np.number]).columns
for col in num_cols:
    data[col] = data[col].fillna(data[col].median())

# If you have categorical features, consider filling missing values
# and performing one-hot encoding. For example:
# data = pd.get_dummies(data, drop_first=True)


In [None]:
# -----------------------------
# 3. Feature Selection
# -----------------------------
# Compute the correlation matrix and select features with a correlation
# magnitude above a chosen threshold (e.g., 0.2) with the target.
corr_matrix = data.corr()
target_corr = corr_matrix['price'].abs().sort_values(ascending=False)

# Exclude the target itself and select features above the threshold.
selected_features = target_corr[target_corr > 0.2].index.drop('price')
print("Selected features:", list(selected_features))

# Define input features (X) and target variable (y).
X = data[selected_features]
y = data['price']

In [None]:
# -----------------------------
# 4. Preprocessing: Train-Test Split and Scaling
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Standardize features for better neural network performance.
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# -----------------------------
# 5. Build the Neural Network
# -----------------------------
# Create a simple feed-forward neural network with two hidden layers.
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')  # Output layer for regression
])


In [None]:
# Compile the model using mean squared error as the loss function.
model.compile(optimizer='adam', loss='mean_squared_error')

# -----------------------------
# 6. Train the Model
# -----------------------------
history = model.fit(
    X_train_scaled, y_train,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)


In [None]:
# -----------------------------
# 7. Evaluate the Model
# -----------------------------
# Predict on the test set and calculate the mean squared error.
predictions = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, predictions)
print("Test Mean Squared Error:", mse)