In [20]:
import torch
import torch.nn as nn
import torchvision.models as models
import pandas as pd
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class MultiModalModel(nn.Module):
    def __init__(self, tabular_input_dim):
        super(MultiModalModel, self).__init__()
        
        # Branch 1: Image (ResNet18)
        self.cnn = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
        cnn_out_features = self.cnn.fc.in_features
        self.cnn.fc = nn.Identity() # Remove classification layer

        # Branch 2: Tabular (Multilayer Perceptron)
        self.mlp = nn.Sequential(
            nn.Linear(tabular_input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.2)
        )

        # Fusion: Merge CNN (512) and MLP (64) outputs
        self.fusion = nn.Sequential(
            nn.Linear(512 + 64, 128),
            nn.ReLU(),
            nn.Linear(128, 1) # Output: Predicted Log Price
        )

    def forward(self, image, tabular):
        img_feats = self.cnn(image)
        tab_feats = self.mlp(tabular)
        combined = torch.cat((img_feats, tab_feats), dim=1)
        return self.fusion(combined)

In [21]:
from torch.utils.data import DataLoader

# 1. Initialize the Dataset using your processed CSV and images folder
train_ds = MultimodalDataset("data/processed_train.csv", "house_images", transform=transform)

# 2. Define the train_loader (This fixes your NameError)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)

In [22]:
class MultimodalDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        # Load image via ID
        img_path = os.path.join(self.img_dir, f"{int(row['id'])}.jpg")
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
            
        # Features and target
        tabular = torch.tensor(row.drop(['id', 'price_log']).values, dtype=torch.float)
        target = torch.tensor(row['price_log'], dtype=torch.float).unsqueeze(0)
        
        return image, tabular, target

In [24]:
from tqdm import tqdm

# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultiModalModel(tabular_input_dim=17).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# Training Loop
epochs = 20

for epoch in range(epochs):
    model.train()
    total_loss = 0
    
    # Wrap train_loader with tqdm for a progress bar
    loop = tqdm(train_loader, total=len(train_loader), leave=True)
    loop.set_description(f"Epoch [{epoch+1}/{epochs}]")

    for images, tabular, targets in loop:
        images, tabular, targets = images.to(device), tabular.to(device), targets.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(images, tabular)
        loss = criterion(outputs, targets)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Update progress bar
        total_loss += loss.item()
        loop.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1} complete. Average Loss: {avg_loss:.4f}")

# Save the weights
torch.save(model.state_dict(), "data/multimodal_model.pth")
print("Model saved successfully!")

Epoch [1/20]: 100%|██████████| 402/402 [01:40<00:00,  4.00it/s, loss=0.721] 


Epoch 1 complete. Average Loss: 12.9002


Epoch [2/20]: 100%|██████████| 402/402 [01:38<00:00,  4.07it/s, loss=0.564] 


Epoch 2 complete. Average Loss: 0.1198


Epoch [3/20]: 100%|██████████| 402/402 [01:27<00:00,  4.61it/s, loss=0.236] 


Epoch 3 complete. Average Loss: 0.0739


Epoch [4/20]: 100%|██████████| 402/402 [01:31<00:00,  4.39it/s, loss=0.383] 


Epoch 4 complete. Average Loss: 0.0552


Epoch [5/20]: 100%|██████████| 402/402 [01:31<00:00,  4.41it/s, loss=0.37]  


Epoch 5 complete. Average Loss: 0.0479


Epoch [6/20]: 100%|██████████| 402/402 [01:27<00:00,  4.59it/s, loss=0.0358]


Epoch 6 complete. Average Loss: 0.0431


Epoch [7/20]: 100%|██████████| 402/402 [01:26<00:00,  4.63it/s, loss=0.00401]


Epoch 7 complete. Average Loss: 0.0363


Epoch [8/20]: 100%|██████████| 402/402 [01:51<00:00,  3.59it/s, loss=0.117] 


Epoch 8 complete. Average Loss: 0.0366


Epoch [9/20]: 100%|██████████| 402/402 [01:25<00:00,  4.68it/s, loss=0.197] 


Epoch 9 complete. Average Loss: 0.0339


Epoch [10/20]: 100%|██████████| 402/402 [01:28<00:00,  4.57it/s, loss=0.629] 


Epoch 10 complete. Average Loss: 0.0334


Epoch [11/20]: 100%|██████████| 402/402 [01:28<00:00,  4.52it/s, loss=0.00106]


Epoch 11 complete. Average Loss: 0.0361


Epoch [12/20]: 100%|██████████| 402/402 [01:16<00:00,  5.25it/s, loss=0.024] 


Epoch 12 complete. Average Loss: 0.0299


Epoch [13/20]: 100%|██████████| 402/402 [01:18<00:00,  5.15it/s, loss=0.0687] 


Epoch 13 complete. Average Loss: 0.0283


Epoch [14/20]: 100%|██████████| 402/402 [01:22<00:00,  4.87it/s, loss=0.115]  


Epoch 14 complete. Average Loss: 0.0298


Epoch [15/20]: 100%|██████████| 402/402 [01:22<00:00,  4.89it/s, loss=0.0105] 


Epoch 15 complete. Average Loss: 0.0298


Epoch [16/20]: 100%|██████████| 402/402 [01:27<00:00,  4.60it/s, loss=0.0641] 


Epoch 16 complete. Average Loss: 0.0256


Epoch [17/20]: 100%|██████████| 402/402 [01:25<00:00,  4.72it/s, loss=0.0924] 


Epoch 17 complete. Average Loss: 0.0280


Epoch [18/20]: 100%|██████████| 402/402 [01:24<00:00,  4.78it/s, loss=0.00074]


Epoch 18 complete. Average Loss: 0.0277


Epoch [19/20]: 100%|██████████| 402/402 [01:21<00:00,  4.93it/s, loss=0.128]  


Epoch 19 complete. Average Loss: 0.0252


Epoch [20/20]: 100%|██████████| 402/402 [01:22<00:00,  4.85it/s, loss=0.00985]

Epoch 20 complete. Average Loss: 0.0273
Model saved successfully!





In [25]:
from sklearn.metrics import r2_score, mean_squared_error

# 1. Prepare Test Loader
test_ds = MultimodalDataset("data/processed_test.csv", "house_images", transform)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

model.eval()
all_preds = []
all_targets = []

# 2. Inference Loop
with torch.no_grad():
    for images, tabular, targets in tqdm(test_loader, desc="Evaluating"):
        images, tabular = images.to(device), tabular.to(device)
        
        outputs = model(images, tabular)
        
        # Move to CPU and store
        all_preds.extend(outputs.cpu().numpy())
        all_targets.extend(targets.numpy())

# 3. Convert back from Log-Space to Real Prices
# We use np.expm1 because we used np.log1p during preprocessing
real_preds = np.expm1(all_preds)
real_targets = np.expm1(all_targets)

# 4. Calculate Metrics
rmse = np.sqrt(mean_squared_error(real_targets, real_preds))
r2 = r2_score(real_targets, real_preds)

print(f"\n--- Model Performance ---")
print(f"RMSE: ${rmse:,.2f}")
print(f"R² Score: {r2:.4f}")

Evaluating: 100%|██████████| 101/101 [00:59<00:00,  1.70it/s]


--- Model Performance ---
RMSE: $168,006.53
R² Score: 0.7587





In [None]:
import torch.nn.functional as F
import cv2

def generate_gradcam(model, img_tensor, tab_tensor):
    model.eval()
    
    # 1. Target the last convolutional layer in your vision branch 
    target_layer = model.vision_features[-1][-1].bn2 
    
    # 2. Hooks to capture gradients and activations
    gradients = []
    activations = []
    def save_gradient(grad): gradients.append(grad)
    def save_activation(module, input, output): activations.append(output)
    
    target_layer.register_forward_hook(save_activation)
    
    # 3. Forward pass
    output = model(img_tensor.unsqueeze(0), tab_tensor.unsqueeze(0))
    model.zero_grad()
    
    # 4. Backward pass to get gradients
    output.backward()
    target_layer.weight.register_hook(save_gradient)
    
    # 5. Calculate weights and heatmap
    weights = torch.mean(gradients[0], dim=(2, 3), keepdim=True)
    cam = torch.sum(weights * activations[0], dim=1).squeeze().detach().cpu().numpy()
    
    # 6. Normalize and resize
    cam = np.maximum(cam, 0)
    cam = cv2.resize(cam, (224, 224))
    cam = (cam - cam.min()) / (cam.max() - cam.min())
    return cam