<a href="https://colab.research.google.com/github/prernakukreja5/Satellite_imagery_housing_price_prediction/blob/main/multimodal_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

# L MULTIMODAL PIPELINE (800 LOCALITIES)
# TABULAR (XGBoost) + IMAGE (CNN embeddings)
# ============================================

import numpy as np
import pandas as pd
import torch

from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

# --------------------------------------------
# 1️⃣ LOAD TABULAR DATA
# --------------------------------------------
train_df = pd.read_csv("train(1)(train(1)).csv")

FINAL_FEATURES = [
    "sqft_living",
    "bathrooms",
    "grade",
    "lat",
    "yr_built",
    "waterfront",
    "view"
]

# SAME 800 LOCALITIES (random_state=42)
SAMPLE_SIZE = 800
train_sample_df = train_df.sample(
    n=SAMPLE_SIZE,
    random_state=42
).reset_index(drop=True)

X_tab = train_sample_df[FINAL_FEATURES].values
y = train_sample_df["price"].values

# LOG TARGET (CRITICAL)
y_log = np.log1p(y)

# --------------------------------------------
# 2️⃣ LOAD IMAGE EMBEDDINGS
# --------------------------------------------
X_img_tr = torch.load("image_embeddings_train.pt").numpy()  # (3200, 512)
X_img_val = torch.load("image_embeddings_val.pt").numpy()   # (800, 512)

# --------------------------------------------
# 3️⃣ POOL IMAGE EMBEDDINGS (5 IMAGES / LOCALITY)
# --------------------------------------------
# 3200 = 640 localities × 5 images
# 800  = 160 localities × 5 images

def pool_embeddings(X, n_per_loc=5):
    return X.reshape(-1, n_per_loc, X.shape[1]).mean(axis=1)

X_img_tr_pooled  = pool_embeddings(X_img_tr)   # (640, 512)
X_img_val_pooled = pool_embeddings(X_img_val)  # (160, 512)

# --------------------------------------------
# 4️⃣ MATCH TABULAR SPLIT (640 / 160)
# --------------------------------------------
X_tab_tr  = X_tab[:640]
X_tab_val = X_tab[640:]

y_tr_log  = y_log[:640]
y_val_log = y_log[640:]

# --------------------------------------------
# 5️⃣ SCALE TABULAR
# --------------------------------------------
scaler = StandardScaler()
X_tab_tr  = scaler.fit_transform(X_tab_tr)
X_tab_val = scaler.transform(X_tab_val)

# --------------------------------------------
# 6️⃣ TABULAR-ONLY MODEL (XGBOOST)
# --------------------------------------------
tab_model = XGBRegressor(
    n_estimators=700,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    objective="reg:squarederror"
)

tab_model.fit(X_tab_tr, y_tr_log)
tab_preds = tab_model.predict(X_tab_val)

rmse_tab = np.sqrt(mean_squared_error(y_val_log, tab_preds))
r2_tab   = r2_score(y_val_log, tab_preds)

print("TABULAR ONLY")
print("RMSE (log):", rmse_tab)
print("R²:", r2_tab)
print("-" * 50)

# --------------------------------------------
# 7️⃣ MULTIMODAL EARLY FUSION
# --------------------------------------------
X_fused_tr  = np.hstack([X_tab_tr,  X_img_tr_pooled])
X_fused_val = np.hstack([X_tab_val, X_img_val_pooled])

# --------------------------------------------
# 8️⃣ MULTIMODAL MODEL (XGBOOST)
# --------------------------------------------
fusion_model = XGBRegressor(
    n_estimators=900,
    max_depth=5,
    learning_rate=0.03,
    subsample=0.75,
    colsample_bytree=0.75,
    random_state=42,
    objective="reg:squarederror"
)

fusion_model.fit(X_fused_tr, y_tr_log)
fusion_preds = fusion_model.predict(X_fused_val)

rmse_fusion = np.sqrt(mean_squared_error(y_val_log, fusion_preds))
r2_fusion   = r2_score(y_val_log, fusion_preds)

print("MULTIMODAL (TABULAR + IMAGE)")
print("RMSE (log):", rmse_fusion)
print("R²:", r2_fusion)
print("-" * 50)

# --------------------------------------------
# 9️⃣ FINAL COMPARISON TABLE
# --------------------------------------------
results = pd.DataFrame({
    "Model": ["Tabular Only", "Multimodal"],
    "RMSE (log)": [rmse_tab, rmse_fusion],
    "R² (log)": [r2_tab, r2_fusion]
})

print(results)


TABULAR ONLY
RMSE (log): 0.22427020168056988
R²: 0.8085852079973207
--------------------------------------------------
MULTIMODAL (TABULAR + IMAGE)
RMSE (log): 0.2552774772956034
R²: 0.7519967641099627
--------------------------------------------------
          Model  RMSE (log)  R² (log)
0  Tabular Only    0.224270  0.808585
1    Multimodal    0.255277  0.751997
