In [13]:
from pathlib import Path
import json
import numpy as np
import pandas as pd
import joblib
import torch
from torch.utils.data import DataLoader


In [None]:
BASE_DIR = Path("/Users/prabh/Desktop/ML/property_valuation")   # Adjust this path as per your local setup
DATA_DIR = BASE_DIR / "data"
PROCESSED_DIR = DATA_DIR / "processed"
IMAGES_DIR = DATA_DIR / "images"
OUTPUT_DIR = BASE_DIR / "outputs"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [15]:
test_df = pd.read_csv(PROCESSED_DIR / "test_processed.csv")
print("Test samples:", len(test_df))



Test samples: 5396


In [16]:
xgb_model = joblib.load(OUTPUT_DIR / "xgboost_model.pkl")


In [17]:
FEATURES = [
    'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
    'sqft_above', 'sqft_basement', 'sqft_living15', 'sqft_lot15',
    'grade', 'condition', 'view', 'waterfront', 'yr_renovated',
    'yr_built', 'house_age', 'renovated_flag'
]

X_test = test_df[FEATURES]


In [18]:
xgb_preds = xgb_model.predict(X_test)


In [19]:
test_with_xgb = test_df.copy()
test_with_xgb["xgb_pred"] = xgb_preds

save_path = PROCESSED_DIR / "test_with_xgb.csv"
test_with_xgb.to_csv(save_path, index=False)

print("Saved:", save_path)


Saved: \Users\prabh\Desktop\ML\property_valuation\data\processed\test_with_xgb.csv


In [20]:
test_with_xgb = pd.read_csv(PROCESSED_DIR / "test_with_xgb.csv")


In [21]:
from src.dataset import PropertyDataset
from torch.utils.data import DataLoader

test_residual_ds = PropertyDataset(
    PROCESSED_DIR / "test_with_xgb.csv",
    IMAGES_DIR,
    split="test",
    mode="residual",
    xgb_pred_col="xgb_pred"
)

test_loader = DataLoader(
    test_residual_ds,
    batch_size=32,
    shuffle=False
)


In [23]:
from src.models import DualZoomResidualCNN
import torch

residual_model = DualZoomResidualCNN().to(DEVICE)
residual_model.load_state_dict(
    torch.load(
        OUTPUT_DIR / "adaptive_fusion_final.pth",
        map_location=DEVICE,
        weights_only=True
    )
)
residual_model.eval()


DualZoomResidualCNN(
  (image_encoder): DualImageEncoder(
    (encoder16): CNNImageEncoder(
      (backbone): Sequential(
        (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
        (4): Sequential(
          (0): BasicBlock(
            (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (relu): ReLU(inplace=True)
            (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
          (1): BasicBlock(
            (conv1): Conv2d(64, 64, kernel_size=(

In [24]:
cnn_residuals = []

with torch.no_grad():
    for img16, img18, _ in test_loader:
        img16 = img16.to(DEVICE)
        img18 = img18.to(DEVICE)

        preds = residual_model(img16, img18)
        cnn_residuals.append(preds.cpu().numpy())

cnn_residuals = np.concatenate(cnn_residuals).squeeze()


In [25]:
final_log_price = test_with_xgb["xgb_pred"].values + cnn_residuals
final_price = np.expm1(final_log_price)


In [26]:
final_output = pd.DataFrame({
    "id": test_with_xgb["id"],
    "predicted_price": final_price
})

save_path = OUTPUT_DIR / "24114066_final.csv"
final_output.to_csv(save_path, index=False)

print("Final predictions saved to:", save_path)

Final predictions saved to: \Users\prabh\Desktop\ML\property_valuation\outputs\24114066_final.csv
