In [38]:
# Imports
import numpy as np
import pandas as pd
from pathlib import Path

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset

from torchvision import transforms, models
from PIL import Image

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [39]:
# Load train.csv & build fusion_df
train_df = pd.read_csv("../data/raw/train.csv")
train_df["id"] = train_df["id"].astype(int)

IMAGE_DIR = Path("../data/images")

image_ids = sorted([
    int(float(p.stem)) for p in IMAGE_DIR.glob("*.png")
])

fusion_df = (
    train_df[train_df["id"].isin(image_ids)]
    .drop_duplicates(subset="id")
    .sort_values("id")
    .reset_index(drop=True)
)

assert fusion_df.shape[0] == len(image_ids)
assert (fusion_df["id"].values == image_ids).all()

fusion_df.shape


(5980, 21)

In [40]:
# Build ordered image paths
image_paths = [
    IMAGE_DIR / f"{pid}.0.png" if (IMAGE_DIR / f"{pid}.0.png").exists()
    else IMAGE_DIR / f"{pid}.png"
    for pid in fusion_df["id"].values
]

len(image_paths), image_paths[:3]


(5980,
 [PosixPath('../data/images/1200019.0.png'),
  PosixPath('../data/images/3600057.0.png'),
  PosixPath('../data/images/11200290.0.png')])

In [41]:
# Dataset + transforms
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

class SatelliteImageDataset(Dataset):
    def __init__(self, image_paths, transform):
        self.image_paths = image_paths
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx]).convert("RGB")
        return self.transform(img)


In [42]:
# Generate image embeddings (DETERMINISTIC)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

resnet = models.resnet18(
    weights=models.ResNet18_Weights.IMAGENET1K_V1
)
resnet.fc = nn.Identity()
resnet.to(device)
resnet.eval()

for p in resnet.parameters():
    p.requires_grad = False

dataset = SatelliteImageDataset(image_paths, image_transform)
loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=0)

all_embeddings = []

with torch.no_grad():
    for batch in loader:
        batch = batch.to(device)
        emb = resnet(batch)
        all_embeddings.append(emb.cpu().numpy())

X_image = np.vstack(all_embeddings)
X_image.shape


(5980, 512)

In [43]:
np.save("../data/processed/image_embeddings_fusion.npy", X_image)


In [44]:
# Prepare tabular features + target
target = "price"
drop_cols = ["id", "date", target]

X_tabular = fusion_df.drop(columns=drop_cols)
y = np.log1p(fusion_df[target].values)

scaler = StandardScaler()
X_tabular_scaled = scaler.fit_transform(X_tabular)

X_tabular_scaled.shape, y.shape


((5980, 18), (5980,))

In [45]:
# Early fusion
X_fusion = np.hstack([X_tabular_scaled, X_image])
X_fusion.shape


(5980, 530)

In [46]:
# Train / validation split
X_train, X_val, y_train, y_val = train_test_split(
    X_fusion,
    y,
    test_size=0.2,
    random_state=42
)

X_train.shape, X_val.shape


((4784, 530), (1196, 530))

In [47]:
# Convert to PyTorch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)


In [48]:
# Define Fusion MLP
class FusionMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.net(x)


In [49]:
# Train Fusion Model
model = FusionMLP(X_train.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

train_loader = DataLoader(
    TensorDataset(X_train_t, y_train_t),
    batch_size=64,
    shuffle=True,
    num_workers=0
)

val_loader = DataLoader(
    TensorDataset(X_val_t, y_val_t),
    batch_size=64,
    shuffle=False,
    num_workers=0
)

epochs = 15

for epoch in range(epochs):
    model.train()
    train_loss = 0.0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * xb.size(0)

    train_loss /= len(train_loader.dataset)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            loss = criterion(preds, yb)
            val_loss += loss.item() * xb.size(0)

    val_loss /= len(val_loader.dataset)

    print(
        f"Epoch {epoch+1}/{epochs} | "
        f"Train MSE: {train_loss:.4f} | "
        f"Val MSE: {val_loss:.4f}"
    )


Epoch 1/15 | Train MSE: 35.0159 | Val MSE: 3.6175
Epoch 2/15 | Train MSE: 2.6681 | Val MSE: 1.8531
Epoch 3/15 | Train MSE: 1.4142 | Val MSE: 1.0903
Epoch 4/15 | Train MSE: 0.9055 | Val MSE: 0.7709
Epoch 5/15 | Train MSE: 0.6583 | Val MSE: 0.6074
Epoch 6/15 | Train MSE: 0.5378 | Val MSE: 0.5616
Epoch 7/15 | Train MSE: 0.4810 | Val MSE: 0.4840
Epoch 8/15 | Train MSE: 0.4345 | Val MSE: 0.4511
Epoch 9/15 | Train MSE: 0.4061 | Val MSE: 0.4706
Epoch 10/15 | Train MSE: 0.3848 | Val MSE: 0.4318
Epoch 11/15 | Train MSE: 0.3723 | Val MSE: 0.4045
Epoch 12/15 | Train MSE: 0.3494 | Val MSE: 0.3729
Epoch 13/15 | Train MSE: 0.3379 | Val MSE: 0.3640
Epoch 14/15 | Train MSE: 0.3378 | Val MSE: 0.3992
Epoch 15/15 | Train MSE: 0.3206 | Val MSE: 0.3477


In [50]:
# Generate validation predictions
model.eval()

with torch.no_grad():
    val_preds_log = model(X_val_t.to(device)).cpu().numpy().ravel()

val_preds_log.shape


(1196,)

In [51]:
# Convert back to price scale
val_preds_price = np.expm1(val_preds_log)
val_true_price = np.expm1(y_val)


In [53]:
# Compute RMSE & RÂ²
from sklearn.metrics import mean_squared_error, root_mean_squared_error, r2_score

# In scikit-learn >= 1.4, use root_mean_squared_error directly
rmse = root_mean_squared_error(
    val_true_price,
    val_preds_price
)

r2 = r2_score(
    val_true_price,
    val_preds_price
)

print(f"RMSE: {rmse}, R2: {r2}")

RMSE: 426696.0662099379, R2: -0.29377725218330886
