<a href="https://colab.research.google.com/github/kush1305/Satellite-Imagery-Based-Property-Valuation-/blob/main/preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision tqdm --quiet

import os
import torch
import torchvision.transforms as T
import torchvision.models as models
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm

TRAIN_CSV = ""      # training data set path
IMAGE_DIR = ""      # extracted images path
OUTPUT_NPY = ""     # .npy path to save CNN features
OUTPUT_CSV = ""     # path to save merged CSV

device = "cuda" if torch.cuda.is_available() else "cpu"

df = pd.read_csv(TRAIN_CSV)

resnet = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
resnet = torch.nn.Sequential(*list(resnet.children())[:-1])
resnet.to(device).eval()

transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

features = []

with torch.no_grad():
    for pid in tqdm(df["id"]):
        img = Image.open(os.path.join(IMAGE_DIR, f"{pid}.png")).convert("RGB")
        img = transform(img).unsqueeze(0).to(device)
        f = resnet(img).view(-1).cpu().numpy()
        features.append(f)

features = np.vstack(features)

np.save(OUTPUT_NPY, features)

cnn_df = pd.DataFrame(features, columns=[f"cnn_{i}" for i in range(features.shape[1])])
final_df = pd.concat([df.reset_index(drop=True), cnn_df], axis=1)
final_df.to_csv(OUTPUT_CSV, index=False)

print("Done.")
