In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor

Importing dataset for training

In [6]:
df = pd.read_csv("processed_train.csv")

IMG = "img_path"
TARGET = "log_price"

X_tab = df.drop(columns=[IMG, TARGET])
y = df[TARGET].values

In [7]:
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    random_state=42
)

Training only on the Tabular data

In [13]:
X_train = train_df.drop(columns=[IMG_COL, TARGET])
y_train = train_df[TARGET].values

X_val = val_df.drop(columns=[IMG_COL, TARGET])
y_val = val_df[TARGET].values

In [14]:
tabular_model = GradientBoostingRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=4,
    random_state=42
)

tabular_model.fit(X_train, y_train)

In [15]:
y_pred = tabular_model.predict(X_val)

mse  = mean_squared_error(y_val, y_pred)
rmse = np.sqrt(mse)
r2   = r2_score(y_val, y_pred)

print("ðŸ“Š Tabular-Only Model (Validation)")
print(f"MSE  : {mse:.4f}")
print(f"RMSE : {rmse:.4f}")
print(f"RÂ²   : {r2:.4f}")

ðŸ“Š Tabular-Only Model (Validation)
MSE  : 0.0451
RMSE : 0.2124
RÂ²   : 0.8388


Model Architecture for Tabular+Image data

In [8]:
class PropertyDataset(Dataset):
    def __init__(self, df):
        self.df = df.reset_index(drop=True)

        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        img = Image.open(row["img_path"]).convert("RGB")
        img = self.transform(img)

        tab = torch.from_numpy(
        row.drop(["img_path", "log_price"])
            .to_numpy(dtype=np.float32)
        )

        target = torch.tensor(row["log_price"], dtype=torch.float32)

        return img, tab, target


In [9]:
class ImageEncoder(nn.Module):
    def __init__(self):
        super().__init__()
        base = models.resnet18(pretrained=True)
        self.encoder = nn.Sequential(*list(base.children())[:-1])

    def forward(self, x):
        x = self.encoder(x)
        return x.flatten(1)

In [10]:
class TabularEncoder(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64)
        )

    def forward(self, x):
        return self.net(x)

In [11]:
class FusionModel(nn.Module):
    def __init__(self, tab_dim):
        super().__init__()
        self.img_enc = ImageEncoder()
        self.tab_enc = TabularEncoder(tab_dim)

        self.regressor = nn.Sequential(
            nn.Linear(512 + 64, 128),
            nn.ReLU(),
            nn.Linear(128, 1)
        )

    def forward(self, img, tab):
        img_feat = self.img_enc(img)
        tab_feat = self.tab_enc(tab)
        x = torch.cat([img_feat, tab_feat], dim=1)
        return self.regressor(x).squeeze(1)

In [12]:
train_dataset = PropertyDataset(train_df)
val_dataset = PropertyDataset(val_df)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [13]:
model = FusionModel(tab_dim=X_tab.shape[1]).to(device)

for p in model.img_enc.parameters():
    p.requires_grad = False

optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4
)
criterion = nn.MSELoss()



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 44.7M/44.7M [00:00<00:00, 174MB/s]


Model training

In [None]:
EPOCHS = 25
best_rmse = float("inf")

for epoch in range(EPOCHS):

    model.train()
    train_loss = 0.0

    for i, (img, tab, y) in enumerate(train_loader):
        img = img.to(device, non_blocking=True)
        tab = tab.to(device, non_blocking=True)
        y = y.to(device, non_blocking=True)

        optimizer.zero_grad(set_to_none=True)

        preds = model(img, tab)
        loss = criterion(preds, y)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        if i % 50 == 0:
            rmse = torch.sqrt(loss.detach()).item()
            print(
                f"Epoch {epoch+1}/{EPOCHS} "
                f"| Train Step {i}/{len(train_loader)} "
                f"| MSE {loss.item():.4f} | RMSE {rmse:.4f}"
            )

    avg_train_mse = train_loss / len(train_loader)

    model.eval()
    val_preds = []
    val_targets = []

    with torch.no_grad():
        for img, tab, y in val_loader:
            img = img.to(device, non_blocking=True)
            tab = tab.to(device, non_blocking=True)
            y = y.to(device, non_blocking=True)

            preds = model(img, tab)

            val_preds.append(preds.cpu())
            val_targets.append(y.cpu())

    val_preds = torch.cat(val_preds).numpy()
    val_targets = torch.cat(val_targets).numpy()

    val_mse = mean_squared_error(val_targets, val_preds)
    val_rmse = np.sqrt(val_mse)
    val_r2 = r2_score(val_targets, val_preds)

    print(
        f"Epoch {epoch+1} DONE | "
        f"Train MSE {avg_train_mse:.4f} | "
        f"Val RMSE {val_rmse:.4f} | "
        f"Val RÂ² {val_r2:.4f}"
    )

    if val_rmse < best_rmse:
        best_rmse = val_rmse
        torch.save(model.state_dict(), "best_multimodal_model.pth")
        print(f"ðŸ’¾ Best model saved (Val RMSE = {best_rmse:.4f})")

Epoch 1/25 | Train Step 0/403 | MSE 1.5032 | RMSE 1.2261
Epoch 1/25 | Train Step 50/403 | MSE 1.0622 | RMSE 1.0306
Epoch 1/25 | Train Step 100/403 | MSE 0.8462 | RMSE 0.9199
Epoch 1/25 | Train Step 150/403 | MSE 0.3657 | RMSE 0.6048
Epoch 1/25 | Train Step 200/403 | MSE 0.4639 | RMSE 0.6811
Epoch 1/25 | Train Step 250/403 | MSE 7.4353 | RMSE 2.7268


Computing final prediction on test.csv

In [None]:
class PropertyTestDataset(Dataset):
    def __init__(self, df):
        self.df = df.reset_index(drop=True)

        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            )
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        img = Image.open(row["img_path"]).convert("RGB")
        img = self.transform(img)

        tab = torch.from_numpy(
        row.drop(["img_path", "id"])
            .to_numpy(dtype=np.float32)
        )

        id = torch.tensor(row["id"])

        return img, tab, id

In [None]:
test_df = pd.read_csv("processed_test.csv")

test_dataset = PropertyTestDataset(test_df)
test_loader = DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

Loading the model

In [None]:
model = FusionModel(tab_dim=X_tab.shape[1])
model.load_state_dict(torch.load("best_multimodal_model.pth", map_location=device))
model.to(device)
model.eval()



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 44.7M/44.7M [00:00<00:00, 94.7MB/s]


FusionModel(
  (img_enc): ImageEncoder(
    (encoder): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (4): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, 

In [None]:
from tqdm import tqdm

ids = []
log_prices = []

with torch.no_grad():
    for img, tab, batch_ids in tqdm(
        test_loader,
        desc="Running inference",
        total=len(test_loader)
    ):
        img = img.to(device)
        tab = tab.to(device)

        preds = model(img, tab)

        ids.extend(batch_ids.cpu().numpy().astype(np.int64))
        log_prices.extend(preds.cpu().numpy())

prices = np.exp(log_prices)

Running inference: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 169/169 [25:37<00:00,  9.10s/it]


Saving the final prediction into csv

In [None]:
submission = pd.DataFrame({
    "id": ids,
    "predicted_price": prices
})

submission.to_csv("predictions.csv", index=False)
print("âœ… predictions.csv saved")

âœ… predictions.csv saved


In [None]:
for epoch in range(10):
    model.train()
    total_loss = 0

    for i, (img, tab, y) in enumerate(loader):
        img = img.to(device)
        tab = tab.to(device)
        y = y.to(device)

        optimizer.zero_grad()
        preds = model(img, tab)
        loss = criterion(preds, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        if i % 50 == 0:
            rmse = torch.sqrt(loss).item()
            print(
                f"Epoch {epoch+1} | Step {i}/{len(loader)} "
                f"| MSE {loss.item():.4f} | RMSE {rmse:.4f}"
            )

    print(
        f"Epoch {epoch+1} DONE | "
        f"Avg MSE {total_loss/len(loader):.4f}"
    )

Epoch 1 | Step 0/504 | MSE 708.6321 | RMSE 26.6201
Epoch 1 | Step 50/504 | MSE 3.0765 | RMSE 1.7540
Epoch 1 | Step 100/504 | MSE 4.0734 | RMSE 2.0183
Epoch 1 | Step 150/504 | MSE 0.6237 | RMSE 0.7897
Epoch 1 | Step 200/504 | MSE 16.7131 | RMSE 4.0882
Epoch 1 | Step 250/504 | MSE 2.7380 | RMSE 1.6547
Epoch 1 | Step 300/504 | MSE 2.9168 | RMSE 1.7079
Epoch 1 | Step 350/504 | MSE 9.6010 | RMSE 3.0986
Epoch 1 | Step 400/504 | MSE 3.1432 | RMSE 1.7729
Epoch 1 | Step 450/504 | MSE 1.3039 | RMSE 1.1419
Epoch 1 | Step 500/504 | MSE 3.1857 | RMSE 1.7849
Epoch 1 DONE | Avg MSE 37.8903
Epoch 2 | Step 0/504 | MSE 0.6819 | RMSE 0.8258
Epoch 2 | Step 50/504 | MSE 1.2383 | RMSE 1.1128
Epoch 2 | Step 100/504 | MSE 0.2326 | RMSE 0.4823
Epoch 2 | Step 150/504 | MSE 0.2466 | RMSE 0.4966
Epoch 2 | Step 200/504 | MSE 0.1921 | RMSE 0.4383
Epoch 2 | Step 250/504 | MSE 0.1185 | RMSE 0.3442
Epoch 2 | Step 300/504 | MSE 0.1092 | RMSE 0.3304
Epoch 2 | Step 350/504 | MSE 0.0783 | RMSE 0.2798
Epoch 2 | Step 400/50

In [None]:
torch.save(model.state_dict(), "multimodal_model.pth")
print("Model saved")

âœ… Model saved


In [22]:
model = FusionModel(tab_dim=X_tab.shape[1]).to(device)
model.load_state_dict(torch.load("multimodal_model.pth"))

for p in model.img_enc.parameters():
    p.requires_grad = False

optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-4
)
criterion = nn.MSELoss()