 House Price Prediction using Boston Housing Datase

In [None]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [None]:
# Step 2: Load dataset
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['PRICE'] = housing.target
print("Dataset Shape:", df.shape)
print(df.head())

In [None]:
# Step 3: EDA (Optional Visualization)
# Plot the distribution of house prices
plt.figure(figsize=(10, 6))
sns.histplot(df['PRICE'], bins=30, kde=True)
plt.title("Distribution of House Prices")
plt.show()

# Plot a heatmap to visualize
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Feature Correlation Heatmap")
plt.show()

In [None]:
# Step 4: Feature selection & target
# data processing and cleaning
# Define the features (X) and the target variable (y)

X = df.drop('PRICE', axis=1)
y = df['PRICE']

In [None]:
# Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Step 6: Feature Scaling
# Initialize the StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Step 7: Model Training
# InitialiZe the Linear Regression model

model = LinearRegression()

# Train the model using the training data
model.fit(X_train, y_train)

In [None]:
# Step 8: Predictions
y_pred = model.predict(X_test)

In [None]:
# Step 9: Evaluation
#Model Evaluation & Monitoring & Maintenance
# Calculate evaluation metrics for the model

mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"Root Mean Squared Error: {rmse:.2f}")
print(f"RÂ² Score: {r2:.2f}")

In [None]:
# Step 10: Comparing Actual vs Predicted
# variable as comparision to compare actual and predicted prices

comparison = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(comparison.head())

In [None]:
# Visualization of Predictions
# Plot a scatter plot to visualize the actual vs. predicted prices
plt.figure(figsize=(8, 6))
plt.scatter(y_test, y_pred, alpha=0.7, color="blue")
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.show()