In [3]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from torchvision import models, transforms
from PIL import Image

import numpy as np

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [5]:
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),   # ResNet input size
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [6]:
class SatelliteImageDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.df = dataframe.reset_index(drop=True)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.loc[idx, "image_path"]
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        return image


In [8]:
import pandas as pd
from pathlib import Path

# Load the image-linked dataset
image_df = pd.read_csv("../data/processed/train_with_images.csv")

# IMPORTANT: id is float -> filenames are like 12345.0.png
image_df["id"] = image_df["id"].astype(float)

# Build correct image paths
IMAGE_DIR = Path("../data/images")

image_df["image_path"] = image_df["id"].apply(
    lambda x: IMAGE_DIR / f"{x}.png"
)

# Keep only rows where image actually exists
image_df["image_exists"] = image_df["image_path"].apply(lambda x: x.exists())
image_df = image_df[image_df["image_exists"]].reset_index(drop=True)

image_df.shape


(5998, 23)

In [9]:
image_df[["id", "price", "image_path"]].head()


Unnamed: 0,id,price,image_path
0,9543000000.0,139950,../data/images/9543000205.0.png
1,3353400000.0,174000,../data/images/3353400120.0.png
2,2976801000.0,150000,../data/images/2976800749.0.png
3,7335400000.0,219500,../data/images/7335400020.0.png
4,7883601000.0,157500,../data/images/7883600700.0.png


In [10]:
image_dataset = SatelliteImageDataset(
    dataframe=image_df,
    transform=image_transform
)

image_loader = DataLoader(
    image_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=2
)

len(image_dataset)


5998

In [14]:
# Remove classification head
resnet.fc = nn.Identity()

resnet


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [15]:
for param in resnet.parameters():
    param.requires_grad = False


In [16]:
resnet = resnet.to(device)
resnet.eval()


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [18]:
image_loader = DataLoader(
    image_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0   # IMPORTANT FIX
)


In [19]:
all_embeddings = []

resnet.eval()

with torch.no_grad():
    for batch in image_loader:
        batch = batch.to(device)
        embeddings = resnet(batch)
        all_embeddings.append(embeddings.cpu().numpy())


In [20]:
image_embeddings = np.vstack(all_embeddings)
image_embeddings.shape


(5998, 512)

In [22]:
np.save(
    "../data/processed/image_embeddings.npy",
    image_embeddings
)

print("Saved image_embeddings.npy")


Saved image_embeddings.npy


In [23]:
image_df[["id", "price"]].to_csv(
    "../data/processed/image_targets.csv",
    index=False
)

print("Saved image_targets.csv")


Saved image_targets.csv


In [27]:
import numpy as np

X = np.load("../data/processed/image_embeddings.npy")
X.shape


(5998, 512)

In [25]:
import pandas as pd
import numpy as np

targets_df = pd.read_csv("../data/processed/image_targets.csv")

y = np.log1p(targets_df["price"].values)

y.shape


(5998,)

In [28]:
print("X samples:", X.shape[0])
print("y samples:", y.shape[0])


X samples: 5998
y samples: 5998


In [29]:
# Trainâ€“Validation split
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

X_train.shape, X_val.shape


((4798, 512), (1200, 512))

In [30]:
print("Train:", X_train.shape, y_train.shape)
print("Val:", X_val.shape, y_val.shape)


Train: (4798, 512) (4798,)
Val: (1200, 512) (1200,)


In [31]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader


In [32]:
X_train_t = torch.tensor(X_train, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

X_val_t = torch.tensor(X_val, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.float32).unsqueeze(1)

X_train_t.shape, y_train_t.shape


(torch.Size([4798, 512]), torch.Size([4798, 1]))

In [33]:
train_ds = TensorDataset(X_train_t, y_train_t)
val_ds = TensorDataset(X_val_t, y_val_t)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False)


In [34]:
class ImageMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        return self.net(x)


In [35]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ImageMLP().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

model


ImageMLP(
  (net): Sequential(
    (0): Linear(in_features=512, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=1, bias=True)
  )
)

In [36]:
epochs = 10

for epoch in range(epochs):
    model.train()
    train_loss = 0.0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * xb.size(0)

    train_loss /= len(train_loader.dataset)

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            loss = criterion(preds, yb)
            val_loss += loss.item() * xb.size(0)

    val_loss /= len(val_loader.dataset)

    print(f"Epoch {epoch+1}/{epochs} | Train MSE: {train_loss:.4f} | Val MSE: {val_loss:.4f}")


Epoch 1/10 | Train MSE: 16.0059 | Val MSE: 2.2615
Epoch 2/10 | Train MSE: 1.5760 | Val MSE: 1.1770
Epoch 3/10 | Train MSE: 0.9394 | Val MSE: 0.8487
Epoch 4/10 | Train MSE: 0.7353 | Val MSE: 0.7230
Epoch 5/10 | Train MSE: 0.6556 | Val MSE: 0.6907
Epoch 6/10 | Train MSE: 0.6044 | Val MSE: 0.6929
Epoch 7/10 | Train MSE: 0.5603 | Val MSE: 0.6086
Epoch 8/10 | Train MSE: 0.5342 | Val MSE: 0.6155
Epoch 9/10 | Train MSE: 0.5118 | Val MSE: 0.6022
Epoch 10/10 | Train MSE: 0.4950 | Val MSE: 0.5720


In [37]:
model.eval()

with torch.no_grad():
    val_preds_log = []
    val_targets_log = []

    for xb, yb in val_loader:
        xb = xb.to(device)
        preds = model(xb)

        val_preds_log.append(preds.cpu().numpy())
        val_targets_log.append(yb.cpu().numpy())

val_preds_log = np.vstack(val_preds_log).ravel()
val_targets_log = np.vstack(val_targets_log).ravel()

val_preds_log.shape, val_targets_log.shape


((1200,), (1200,))

In [38]:
val_preds_price = np.expm1(val_preds_log)
val_targets_price = np.expm1(val_targets_log)


In [40]:
from sklearn.metrics import root_mean_squared_error, r2_score

# In newer versions, use the dedicated RMSE function
rmse = root_mean_squared_error(
    val_targets_price,
    val_preds_price
)

r2 = r2_score(
    val_targets_price,
    val_preds_price
)

rmse, r2

(517667.21875, -0.7984820604324341)