In [31]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [32]:
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x1b25c846470>

In [33]:
df = pd.read_csv("../data/house_prices.csv")

df.head(10)

Unnamed: 0,Area,Bedrooms,Age,Price
0,1660,6,24,135800.0
1,2094,3,29,111700.0
2,1930,6,24,138100.0
3,1895,4,1,144500.0
4,2438,2,2,137600.0
5,2969,2,6,161400.0
6,1266,5,20,93700.0
7,2038,5,17,147600.0
8,1130,2,2,96100.0
9,2282,5,9,148900.0


# Features (X) and Target (y)


In [34]:
X = df.drop('Price', axis=1).values
y = df['Price'].values.reshape(-1, 1)

# Scale features & target


In [35]:
scaler_X = StandardScaler()
scaler_y = StandardScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)


# Train/test split


In [36]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Convert to tensors


In [37]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

# Create DataLoader for batch training


In [38]:
batch_size = 32
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Define Model


In [39]:
class HousePriceModel(nn.Module):
    def __init__(self, input_size):
        super(HousePriceModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.model(x)

In [40]:
model = HousePriceModel(input_size=X_train.shape[1])

# Loss function, Optimizer and Scheduler

In [41]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001,  weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=10)

# Training loop with batches

In [None]:
max_epochs = 500
patience = 20
best_val_loss = np.inf
epochs_no_improve = 0
best_state = None

for epoch in range(1, max_epochs + 1):
    # Training
    model.train()
    running_loss = 0.0
    for xb, yb in train_loader:
        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    train_loss = running_loss / len(train_loader)

    # Validation
    model.eval()
    with torch.no_grad():
        val_preds = model(X_test)
        val_loss = criterion(val_preds, y_test).item()

    # Scheduler step
    scheduler.step(val_loss)

    # Early stopping
    if val_loss < best_val_loss - 1e-5:
        best_val_loss = val_loss
        epochs_no_improve = 0
        best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
    else:
        epochs_no_improve += 1

    # Print progress
    current_lr = scheduler.get_last_lr()[0]
    if epoch % 20 == 0 or epochs_no_improve == 0:
        print(f"Epoch {epoch:4d} | Train Loss: {train_loss:.5f} | Val Loss: {val_loss:.5f} | LR: {current_lr:.6f}")

    if epochs_no_improve >= patience:
        print(f"Early stopping at epoch {epoch} (best val_loss={best_val_loss:.5f})")
        break

Epoch    1 | Train Loss: 0.76769 | Val Loss: 0.52170 | LR: 0.001000
Epoch    2 | Train Loss: 0.32449 | Val Loss: 0.12773 | LR: 0.001000
Epoch    3 | Train Loss: 0.09334 | Val Loss: 0.08489 | LR: 0.001000
Epoch    4 | Train Loss: 0.07304 | Val Loss: 0.07601 | LR: 0.001000
Epoch    5 | Train Loss: 0.06962 | Val Loss: 0.07590 | LR: 0.001000
Epoch    7 | Train Loss: 0.06995 | Val Loss: 0.07386 | LR: 0.001000
Epoch    9 | Train Loss: 0.06591 | Val Loss: 0.07310 | LR: 0.001000
Epoch   11 | Train Loss: 0.06541 | Val Loss: 0.07263 | LR: 0.001000
Epoch   19 | Train Loss: 0.06334 | Val Loss: 0.07254 | LR: 0.001000
Epoch   20 | Train Loss: 0.06340 | Val Loss: 0.07337 | LR: 0.001000
Epoch   23 | Train Loss: 0.06242 | Val Loss: 0.07236 | LR: 0.001000
Epoch   34 | Train Loss: 0.06278 | Val Loss: 0.07207 | LR: 0.001000
Epoch   40 | Train Loss: 0.06178 | Val Loss: 0.07384 | LR: 0.001000
Early stopping at epoch 54 (best val_loss=0.07207)


In [43]:
# Restore best model
if best_state is not None:
    model.load_state_dict(best_state)

# Evaluation

In [44]:
model.eval()
with torch.no_grad():
    y_pred_scaled = model(X_test)
    y_pred = scaler_y.inverse_transform(y_pred_scaled.numpy())
    y_test_orig = scaler_y.inverse_transform(y_test.numpy())


# Mean Absolute Error and R2 Score

In [45]:
mae = mean_absolute_error(y_test_orig, y_pred)
r2 = r2_score(y_test_orig, y_pred)
print(f"MAE: {mae:,.2f}")
print(f"R² Score: {r2:.4f}")

MAE: 7,978.75
R² Score: 0.9283


# Actual and Predicted Prices

In [46]:
results_df = pd.DataFrame({
    "Actual Price": y_test_orig.flatten(),
    "Predicted Price": y_pred.flatten()
})

print(results_df.head(10))

   Actual Price  Predicted Price
0      135400.0    119407.234375
1      100700.0    118551.062500
2      112200.0    114936.656250
3      166100.0    152684.625000
4      192100.0    196820.515625
5      133100.0    131305.015625
6      191700.0    183348.937500
7      103400.0    100937.242188
8       61200.0     79463.984375
9      138600.0    117636.804688
