<a href="https://colab.research.google.com/github/onseventhflow/house-price-prediction-regression/blob/main/house_price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install scikit-learn tensorflow




In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models

In [3]:
# Predict house prices based on 10 numerical features
# Generate synthetic dataset (1000 samples, 10 features)
np.random.seed(42)

x_train = np.random.rand(1000, 10)
y_train = np.random.rand(1000) * 500000   # House prices between 0-500k

x_test = np.random.rand(200, 10)
y_test = np.random.rand(200) * 500000

# Build model
model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(10,)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)   # Single output for regression
])

# Compile model
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

# Train model
model.fit(
    x_train,
    y_train,
    epochs=20,
    batch_size=32,
    verbose=1
)

# Evaluate model
test_loss, test_mae = model.evaluate(x_test, y_test)
print(f"Test MAE: ${test_mae:.2f}")

# Make a prediction
sample_input = np.random.rand(1, 10)
predicted_price = model.predict(sample_input)
print(f"Predicted House Price: ${predicted_price[0][0]:.2f}")

# MAE is around 243K - 245K, which means on average, the predictions are off by nearly half of the actual house price.
# This is very bad because an error of $243K in a price range of $500K means the model is almost random.
# The dataset is completely random, meaning there is no real pattern for the model to learn.
# The model is guessing house prices randomly, which is why MAE is so large.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - loss: 82080382976.0000 - mae: 248890.8281
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 83513565184.0000 - mae: 251576.9062
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 84896489472.0000 - mae: 252251.7500
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 86818447360.0000 - mae: 257159.5625
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 83349413888.0000 - mae: 251876.6875
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 84547911680.0000 - mae: 252302.8125
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 82303762432.0000 - mae: 247729.8438
Epoch 8/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 85053333504

In [4]:
# Example 2 - House Price Prediction (Regression) with Synthetic dataset and Normalization

import tensorflow as tf
from tensorflow.keras import layers, models
import numpy as np

# Generate synthetic dataset (1000 samples, 10 features) in [0,1] range
np.random.seed(42)

x_train = np.random.rand(1000, 10)
y_train = np.random.rand(1000)   # Target values in range 0-1

x_test = np.random.rand(200, 10)
y_test = np.random.rand(200)     # Targets in range 0-1

# Build model
model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(10,)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)   # Single output for regression
])

# Compile model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train model
model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=1)

# Evaluate model
test_loss, test_mae = model.evaluate(x_test, y_test)
print(f"Test MAE (Normalized): {test_mae:.4f}")

# Make a prediction
sample_input = np.random.rand(1, 10)   # In [0,1] range
predicted_value = model.predict(sample_input)
print(f"Predicted Value (Normalized): {predicted_value[0][0]:.4f}")

# This is again a bad model because we are still training on random values (just normalized values)


Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - loss: 0.1262 - mae: 0.2917
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.0880 - mae: 0.2536
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0836 - mae: 0.2452
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0861 - mae: 0.2491
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0779 - mae: 0.2394
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0789 - mae: 0.2389
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0793 - mae: 0.2404
Epoch 8/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0811 - mae: 0.2442
Epoch 9/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - loss: 0

In [5]:
# Example 3 - House Price Prediction (Regression) using California Housing Dataset SCIKIT LEARN Dataset
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# Load California Housing dataset (CSV version – works in Colab)
df = pd.read_csv(
    "https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.csv"
)

# Use only numerical features (drop categorical column)
x = df.drop(["median_house_value", "ocean_proximity"], axis=1).values
y = df["median_house_value"].values / 100000   # scale to 100,000s

print(
    f"Target Value Range (in 100,000s): "
    f"Min = {y.min():.2f}, Max = {y.max():.2f}, Mean = {y.mean():.2f}"
)

# Split data into train and test sets
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

# Normalize features – Z-Score Standardization
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Build neural network model
model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(x_train.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)   # Single output for house price
])

# Compile model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train model
model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=1)

# Evaluate model
test_loss, test_mae = model.evaluate(x_test, y_test)
print(f"Test MAE: ${test_mae * 100000:.2f}")

# Make a prediction
sample_input = np.expand_dims(x_test[0], axis=0)
predicted_price = model.predict(sample_input)
print(f"Predicted House Price: ${predicted_price[0][0] * 100000:.2f}")


Target Value Range (in 100,000s): Min = 0.15, Max = 5.00, Mean = 2.07
Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2.1265 - mae: 1.0002
Epoch 2/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.4004 - mae: 0.4515
Epoch 3/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3661 - mae: 0.4332
Epoch 4/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3612 - mae: 0.4288
Epoch 5/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3525 - mae: 0.4192
Epoch 6/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.3259 - mae: 0.4023
Epoch 7/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3194 - mae: 0.4006
Epoch 8/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3065 - mae: 0.3905
Epoch 9/20
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - los

In [None]:
# #  - House Price Prediction (Regression) with SCIKIT Dataset

# import tensorflow as tf
# from tensorflow.keras import layers, models
# from sklearn.datasets import fetch_california_housing
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# import numpy as np

# # Load California housing dataset
# data = fetch_california_housing()
# x, y = data.data, data.target   # Features and target (median house value in 100,000s)

# print(
#     f"Target Value Range (in 100,000s): "
#     f"Min = {y.min():.2f}, Max = {y.max():.2f}, Mean = {y.mean():.2f}"
# )

# # Split data into train and test sets
# x_train, x_test, y_train, y_test = train_test_split(
#     x, y, test_size=0.2, random_state=42
# )

# # Normalize features for better training stability – Z-Score Standardization / Standard scaling
# scaler = StandardScaler()
# x_train = scaler.fit_transform(x_train)   # Compute mean & std from training data, then scale
# x_test = scaler.transform(x_test)          # Use the same scaling parameters to transform test data

# # We use the same mean and standard deviation (computed from x_train) to scale x_test
# # This ensures that both training and test data follow the same distribution

# # If we include x_test when computing mean and standard deviation, the model gets information from the test set before training
# # This is called data leakage, which can lead to unrealistically good performance and poor generalization to new data
# # Test data should only be used for evaluation after the model is trained

# # In real-world applications, new data arrives after training. We do not get to recompute the mean and std for each new data point
# # The model should be able to handle unseen data using the same scaling applied during training


# # Build model
# model = models.Sequential([
#     layers.Dense(64, activation='relu', input_shape=(x_train.shape[1],)),  # Input features from dataset
#     layers.Dense(32, activation='relu'),
#     layers.Dense(1)   # Single output for regression
# ])

# # Compile model
# model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# # Train model
# model.fit(x_train, y_train, epochs=20, batch_size=32, verbose=1)

# # Evaluate model
# test_loss, test_mae = model.evaluate(x_test, y_test)
# print(f"Test MAE: ${test_mae * 100000:.2f}")   # Convert to actual dollars

# # Make a prediction
# sample_input = np.expand_dims(x_test[0], axis=0)   # Take one test sample
# predicted_price = model.predict(sample_input)
# print(f"Predicted House Price: ${predicted_price[0][0] * 100000:.2f}")  # Convert to actual dollars
