In [4]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import pandas as pd
import numpy as np

class MultimodalDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        # Handle both string paths and DataFrames
        self.data = pd.read_csv(csv_file) if isinstance(csv_file, str) else csv_file
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        tab_features = torch.tensor(row.drop(['id', 'price_log'], errors='ignore').values, dtype=torch.float32)
        label = torch.tensor(row['price_log'], dtype=torch.float32)
        
        img_path = os.path.join(self.img_dir, f"{int(row['id'])}.jpg")
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, tab_features, label

image_transforms = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(p=0.5), # New: Helps generalization
    transforms.RandomRotation(15),           # New: Handles satellite tilt
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [5]:
import torch.nn as nn
from torchvision import models

class PricePredictor(nn.Module):
    def __init__(self, num_tabular_cols):
        super(PricePredictor, self).__init__()
        # 1. Change the model name
        self.resnet = models.resnet50(weights='DEFAULT') 
        self.vision_features = nn.Sequential(*list(self.resnet.children())[:-1])
        self.tabular_branch = nn.Sequential(
            nn.Linear(num_tabular_cols, 32),
            nn.ReLU(),
            nn.Linear(32, 16)
        )
        self.regressor = nn.Sequential(
            nn.Linear(2048 + 16, 64),
            nn.ReLU(),
            nn.Linear(64, 1) 
        )

    def forward(self, img, tab):
        v_feat = self.vision_features(img).view(img.size(0), -1)
        t_feat = self.tabular_branch(tab)
        combined = torch.cat((v_feat, t_feat), dim=1)
        return self.regressor(combined)

In [6]:
import joblib

# 1. Load the scaler and count features correctly
scaler = joblib.load('data/scaler.pkl')
num_features = len(scaler.feature_names_in_)
print(f"Scaler loaded. Model will use {num_features} tabular features.")

# 2. Setup DataLoaders
train_dataset = MultimodalDataset('data/processed_train.csv', 'house_images', image_transforms)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# 3. Initialize Model once
model = PricePredictor(num_tabular_cols=num_features)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# 4. Setup Optimizer and NEW Scheduler for better accuracy
criterion = nn.MSELoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # Lower LR for stability
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)

print(f"Ready to train on {device}!")

Scaler loaded. Model will use 13 tabular features.
Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to C:\Users\tanis/.cache\torch\hub\checkpoints\resnet50-11ad3fa6.pth


100%|██████████| 97.8M/97.8M [00:26<00:00, 3.82MB/s]


Ready to train on cpu!


In [7]:
from tqdm import tqdm

# Increase epochs to 10 for better accuracy if your time allows
epochs = 10 

for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    # Progress bar setup
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}", leave=True)
    
    for images, tabular, labels in loop:
        # Move data to GPU/CPU
        images, tabular, labels = images.to(device), tabular.to(device), labels.to(device).view(-1, 1)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images, tabular)
        loss = criterion(outputs, labels)
        
        # Backward pass & Optimization
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        # Update progress bar with live loss
        loop.set_postfix(loss=loss.item())
    
    # Update the Learning Rate
    scheduler.step()
    current_lr = optimizer.param_groups[0]['lr']
    
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1} Complete. Avg Loss: {avg_loss:.4f} | LR: {current_lr}")

Epoch 1/10: 100%|██████████| 807/807 [13:07<00:00,  1.03it/s, loss=0.721]  


Epoch 1 Complete. Avg Loss: 8.1726 | LR: 0.0001


Epoch 2/10: 100%|██████████| 807/807 [09:31<00:00,  1.41it/s, loss=0.189] 


Epoch 2 Complete. Avg Loss: 0.2694 | LR: 5e-05


Epoch 3/10: 100%|██████████| 807/807 [11:17<00:00,  1.19it/s, loss=0.248]   


Epoch 3 Complete. Avg Loss: 0.1364 | LR: 5e-05


Epoch 4/10: 100%|██████████| 807/807 [14:08<00:00,  1.05s/it, loss=0.0625]  


Epoch 4 Complete. Avg Loss: 0.1121 | LR: 2.5e-05


Epoch 5/10: 100%|██████████| 807/807 [10:37<00:00,  1.27it/s, loss=0.219] 


Epoch 5 Complete. Avg Loss: 0.0929 | LR: 2.5e-05


Epoch 6/10: 100%|██████████| 807/807 [11:16<00:00,  1.19it/s, loss=0.119] 


Epoch 6 Complete. Avg Loss: 0.0852 | LR: 1.25e-05


Epoch 7/10: 100%|██████████| 807/807 [10:13<00:00,  1.32it/s, loss=0.0485]


Epoch 7 Complete. Avg Loss: 0.0769 | LR: 1.25e-05


Epoch 8/10: 100%|██████████| 807/807 [09:12<00:00,  1.46it/s, loss=0.118] 


Epoch 8 Complete. Avg Loss: 0.0736 | LR: 6.25e-06


Epoch 9/10: 100%|██████████| 807/807 [09:30<00:00,  1.41it/s, loss=0.0512]


Epoch 9 Complete. Avg Loss: 0.0687 | LR: 6.25e-06


Epoch 10/10: 100%|██████████| 807/807 [09:48<00:00,  1.37it/s, loss=0.069] 

Epoch 10 Complete. Avg Loss: 0.0669 | LR: 3.125e-06





In [8]:
# 1. Load the processed test data created in the Preprocessing Notebook
test_df = pd.read_csv('data/processed_test.csv')

# 2. Create the Test Dataset object
test_dataset = MultimodalDataset(
    csv_file=test_df, 
    img_dir='house_images', 
    transform=image_transforms
)

# 3. Create the Test Loader
# We use shuffle=False because we want the results in a predictable order
test_loader = DataLoader(
    test_dataset, 
    batch_size=16, 
    shuffle=False
)

print(f"Test Loader ready with {len(test_df)} houses!")

Test Loader ready with 3229 houses!


In [9]:
model.eval() # Set model to evaluation mode
all_preds = []
all_actuals = []

with torch.no_grad(): # Disable gradient calculation to save memory
    for images, tabular, labels in tqdm(test_loader, desc="Evaluating"):
        images, tabular = images.to(device), tabular.to(device)
        
        # Get model output
        outputs = model(images, tabular)
        
        # Convert log-prices back to actual dollars
        all_preds.extend(np.expm1(outputs.cpu().numpy()).flatten())
        all_actuals.extend(np.expm1(labels.numpy()).flatten())

print("Variables all_preds and all_actuals are now defined!")

Evaluating: 100%|██████████| 202/202 [01:21<00:00,  2.46it/s]

Variables all_preds and all_actuals are now defined!





In [10]:
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

# 1. Convert lists to numpy arrays
preds_array = np.array(all_preds)
actuals_array = np.array(all_actuals)

# 2. Calculate R2 Score (Accuracy)
r2 = r2_score(actuals_array, preds_array)

# 3. Calculate MAE (Average Error)
mae = np.mean(np.abs(preds_array - actuals_array))

# 4. Calculate RMSE (Penalty for large errors)
rmse = np.sqrt(mean_squared_error(actuals_array, preds_array))

print(f"--- FINAL MODEL PERFORMANCE ---")
print(f"R2 Score: {r2:.4f}")
print(f"Mean Absolute Error (MAE): ${mae:,.2f}")
print(f"Root Mean Squared Error (RMSE): ${rmse:,.2f}")

--- FINAL MODEL PERFORMANCE ---
R2 Score: 0.6955
Mean Absolute Error (MAE): $114,119.39
Root Mean Squared Error (RMSE): $186,368.32


In [11]:
# Save the weights to a file
torch.save(model.state_dict(), 'multimodal_house_model_v1.pth')
print("Model weights saved successfully!")

Model weights saved successfully!
