In [67]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [52]:
df = pd.read_csv("../data/house_prices.csv")

df.head(10)

Unnamed: 0,Area,Bedrooms,Age,Price
0,1660,6,24,135800.0
1,2094,3,29,111700.0
2,1930,6,24,138100.0
3,1895,4,1,144500.0
4,2438,2,2,137600.0
5,2969,2,6,161400.0
6,1266,5,20,93700.0
7,2038,5,17,147600.0
8,1130,2,2,96100.0
9,2282,5,9,148900.0


# Features (X) and Target (y)


In [53]:
X = df.drop('Price', axis=1).values
y = df['Price'].values.reshape(-1, 1)

# Scale features & target


In [54]:
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)


# Train/test split


In [55]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Convert to tensors


In [56]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader for batch training


In [57]:
batch_size = 32
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define Model


In [58]:
class HousePriceModel(nn.Module):
    def __init__(self, input_size):
        super(HousePriceModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

In [59]:
model = HousePriceModel(input_size=X_train.shape[1])

# Loss function, Optimizer and Scheduler

In [65]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001,  weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=10)

# Training loop with batches

In [68]:
max_epochs = 500
patience = 20
best_val_loss = np.inf
epochs_no_improve = 0
best_state = None

for epoch in range(1, max_epochs + 1):
    # Training
    model.train()
    running_loss = 0.0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_loss = running_loss / len(train_loader)

    # Validation
    model.eval()
    with torch.no_grad():
        val_preds = model(X_test_t)
        val_loss = criterion(val_preds, y_test_t).item()

    # Scheduler step
    scheduler.step(val_loss)

    # Early stopping
    if val_loss < best_val_loss - 1e-5:
        best_val_loss = val_loss
        epochs_no_improve = 0
        best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
    else:
        epochs_no_improve += 1

    # Print progress
    current_lr = scheduler.get_last_lr()[0]
    if epoch % 20 == 0 or epochs_no_improve == 0:
        print(f"Epoch {epoch:4d} | Train Loss: {train_loss:.5f} | Val Loss: {val_loss:.5f} | LR: {current_lr:.6f}")

    if epochs_no_improve >= patience:
        print(f"Early stopping at epoch {epoch} (best val_loss={best_val_loss:.5f})")
        break

Epoch    1 | Train Loss: 0.04848 | Val Loss: 0.08470 | LR: 0.001000
Epoch    2 | Train Loss: 0.04833 | Val Loss: 0.08388 | LR: 0.001000
Epoch    6 | Train Loss: 0.04731 | Val Loss: 0.08335 | LR: 0.001000
Epoch   10 | Train Loss: 0.04740 | Val Loss: 0.08238 | LR: 0.001000
Epoch   20 | Train Loss: 0.04683 | Val Loss: 0.08361 | LR: 0.001000
Early stopping at epoch 30 (best val_loss=0.08238)


In [69]:
# Restore best model
if best_state is not None:
    model.load_state_dict(best_state)

# Evaluation

In [70]:
model.eval()
with torch.no_grad():
    y_pred_scaled = model(X_test_t)
    y_pred = scaler_y.inverse_transform(y_pred_scaled.numpy())
    y_test_orig = scaler_y.inverse_transform(y_test_t.numpy())


# Mean Absolute Error and R2 Score

In [74]:
mae = mean_absolute_error(y_test_orig, y_pred)
r2 = r2_score(y_test_orig, y_pred)
print(f"MAE: {mae:,.2f}")
print(f"R² Score: {r2:.4f}")

MAE: 8,707.00
R² Score: 0.9181


# Actual and Predicted Prices

In [71]:
results_df = pd.DataFrame({
    "Actual Price": y_test_orig.flatten(),
    "Predicted Price": y_pred.flatten()
})

print(results_df.head(10))

   Actual Price  Predicted Price
0      135400.0    120904.460938
1      100700.0    120271.750000
2      112200.0    116916.296875
3      166100.0    157998.187500
4      192100.0    193395.187500
5      133100.0    126743.265625
6      191700.0    183894.609375
7      103400.0    101458.171875
8       61200.0     79270.187500
9      138600.0    119669.718750


# Wrap Model in a Function

In [79]:
def predict_house_price(area_sqft, bedrooms, age_years, model, scaler_X, scaler_y):
    # Create input array
    X_new = np.array([[area_sqft, bedrooms, age_years]])
    
    # Scale features
    X_scaled = scaler_X.transform(X_new)
    
    # Convert to tensor
    X_t = torch.tensor(X_scaled, dtype=torch.float32)
    
    # Model prediction
    model.eval()
    with torch.no_grad():
        y_scaled_pred = model(X_t).numpy()
    
    # Inverse scale to original price
    price_pred = scaler_y.inverse_transform(y_scaled_pred)
    
    return float(price_pred[0, 0])


# Test Function

In [81]:
predicted_price = predict_house_price(1660, 6, 24, model, scaler_X, scaler_y)
print(f"Predicted house price: {predicted_price:,.2f}")

Predicted house price: 133,209.30
