# Tabular MLP Baseline Model
## Satellite Imagery–Based Property Valuation

Objective:
- Train a neural network baseline using only tabular features
- Establish a fair benchmark for multimodal (image + tabular) comparison


In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset


In [3]:
BASE_PATH = "/content/drive/MyDrive/IIT_Roorkee_Project/data/"

X_train = pd.read_csv(BASE_PATH + "X_train.csv")
X_val   = pd.read_csv(BASE_PATH + "X_val.csv")
y_train = pd.read_csv(BASE_PATH + "y_train.csv").values.ravel()
y_val   = pd.read_csv(BASE_PATH + "y_val.csv").values.ravel()

print(X_train.shape, X_val.shape)


(12967, 14) (3242, 14)


In [4]:

X_train = X_train.drop(columns=['id'])
X_val   = X_val.drop(columns=['id'])


In [5]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)


In [6]:
X_train_t = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_t = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_val_t = torch.tensor(X_val_scaled, dtype=torch.float32)
y_val_t = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)


In [7]:
train_ds = TensorDataset(X_train_t, y_train_t)
val_ds   = TensorDataset(X_val_t, y_val_t)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=64, shuffle=False)


In [8]:
class TabularMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.3),

            nn.Linear(128, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Dropout(0.2),

            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.net(x)


In [9]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = TabularMLP(input_dim=X_train.shape[1]).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [10]:
EPOCHS = 60

for epoch in range(EPOCHS):
    model.train()
    train_loss = 0

    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)

        optimizer.zero_grad()
        preds = model(xb)
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    if (epoch+1) % 10 == 0:
        print(f"Epoch {epoch+1}/{EPOCHS}, Train MSE: {train_loss/len(train_loader):.4f}")


Epoch 10/60, Train MSE: 1.5191
Epoch 20/60, Train MSE: 0.8586
Epoch 30/60, Train MSE: 0.6924
Epoch 40/60, Train MSE: 0.6129
Epoch 50/60, Train MSE: 0.5569
Epoch 60/60, Train MSE: 0.4919


In [11]:
model.eval()
with torch.no_grad():
    val_preds = []
    val_true = []

    for xb, yb in val_loader:
        xb = xb.to(device)
        preds = model(xb).cpu().numpy()
        val_preds.extend(preds)
        val_true.extend(yb.numpy())

val_preds = np.array(val_preds).ravel()
val_true  = np.array(val_true).ravel()

rmse = np.sqrt(mean_squared_error(val_true, val_preds))
r2   = r2_score(val_true, val_preds)

print(f"MLP Baseline RMSE: {rmse:.4f}")
print(f"MLP Baseline R²  : {r2:.4f}")


MLP Baseline RMSE: 0.1866
MLP Baseline R²  : 0.8738


In [12]:
torch.save(model.state_dict(), BASE_PATH + "mlp_tabular_baseline.pth")

import joblib
joblib.dump(scaler, BASE_PATH + "tabular_scaler.pkl")


['/content/drive/MyDrive/IIT_Roorkee_Project/data/tabular_scaler.pkl']

### Tabular MLP Baseline Results

- Model: Multi-Layer Perceptron (MLP)
- Input: Structured tabular features only
- Purpose: Establish a neural baseline compatible with CNN fusion

This model serves as the primary benchmark for evaluating the added value
of satellite imagery in the multimodal regression pipeline.
