<a href="https://colab.research.google.com/github/shreyammb/INNOV8TIGERS/blob/main/predict_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rasterio==1.3.9 numpy pandas scikit-image scikit-learn xgboost lightgbm joblib tqdm shapely

!pip install numpy==1.26.4
import numpy as np
print("NumPy version:", np.__version__)

In [None]:
import os, math, json
import numpy as np, pandas as pd
import rasterio
from rasterio.warp import reproject, Resampling as WarpResampling
from skimage.feature import local_binary_pattern, canny
from skimage.filters import threshold_otsu
from skimage.morphology import remove_small_objects, opening, closing, square
from scipy.ndimage import uniform_filter
from skimage.util import img_as_ubyte
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, f1_score
import xgboost as xgb, lightgbm as lgb
from joblib import Parallel, delayed
from tqdm import tqdm
from google.colab import drive

In [None]:
drive.mount('/content/drive')

BASE_TIF = "/content/drive/MyDrive/HYD/S2_BOM_V2.tif"
MASK_TIF = "/content/drive/MyDrive/HYD/sample_mask.tif"

OUT_DIR = "/content/drive/MyDrive/HYD/out_slum"
os.makedirs(OUT_DIR, exist_ok=True)

print("Base:", BASE_TIF)
print("Mask:", MASK_TIF)
print("Out dir:", OUT_DIR)

LBP_RADII = [1, 3]
EDGE_WINDOWS = [3, 7]
ROOF_WINDOWS = [3, 7]
LACUNARITY_WINDOWS = [9, 21]
RANK_GLCM_WINDOW = 7

RF_PARAMS = {"n_estimators":200, "max_depth":12, "class_weight":"balanced", "random_state":42}
XGB_PARAMS = {"n_estimators":300, "max_depth":4, "learning_rate":0.05, "eval_metric":"logloss"}
LGB_PARAMS = {"n_estimators":500, "num_leaves":31, "learning_rate":0.05}

In [None]:
def read_raster_meta(path):
    with rasterio.open(path) as src:
        names = [src.descriptions[i] or f"Band_{i+1}" for i in range(src.count)]
        meta = src.meta.copy()
    return names, meta

def read_full_raster(path):
    with rasterio.open(path) as src:
        arr = src.read().astype(np.float32)
        names = [src.descriptions[i] or f"Band_{i+1}" for i in range(src.count)]
        meta = src.meta.copy()
    return arr, names, meta

def find_idx_by_sub(substr):
    s=substr.upper()
    for i,nm in enumerate(names):
        if s in nm.upper():
            return i
    return None

In [None]:
print("Loading raster...")
arr, names, meta = read_full_raster(BASE_TIF)
bands, H, W = arr.shape
print(f"Raster shape: bands={bands}, H={H}, W={W}")
print("Band names (in order):")
for i,nm in enumerate(names,1):
    print(f"{i:02d}. {nm}")

roads_tif = "/content/drive/MyDrive/BOM_WEAKSUP/road_distance_stack.tif"
with rasterio.open(roads_tif) as src:
    roads_arr = src.read(out_shape=(src.count, H, W))
    roads_meta = src.meta

roads_names = [
    "dist_primary",
    "dist_secondary",
    "dist_tertiary",
    "dist_unclassified",
    "dist_residential"
]

arr = np.concatenate([arr, roads_arr], axis=0)
names.extend(roads_names)
bands = arr.shape[0]

print("After adding roads bands:", arr.shape)

band_map = {}
for key in [
    'B2','B3','B4','B8A','B8','B11','B12',
    'CSSI1','CSSI2','NDVI','NDBI','BSI','MNDWI','EVI',
    'GLCM','LBP','B4_ASM','B4_CONTRAST'
]:
    idx = find_idx_by_sub(key)
    if idx is not None:
        band_map[key] = idx
print("Detected band_map (0-based indices):", band_map)

mins = arr.reshape(bands, -1).min(axis=1)
maxs = arr.reshape(bands, -1).max(axis=1)
for i in range(bands):
    print(f"{i+1:02d}. {names[i]}  min={mins[i]:.6g}  max={maxs[i]:.6g}")

const_idx = [i for i in range(bands) if np.isclose(mins[i], maxs[i])]
print("Constant bands (will be dropped):", [names[i] for i in const_idx])
keep_idx = [i for i in range(bands) if i not in const_idx]
arr = arr[keep_idx,:,:]
names = [names[i] for i in keep_idx]
bands = arr.shape[0]
print("Kept bands:", len(names))

ratios = []
for i in range(bands):
    lo, hi = arr[i].min(), arr[i].max()
    if lo == 0:
        r = np.inf if hi != 0 else 1.0
    else:
        r = abs(hi/lo)
    ratios.append(r)

crazy = [i for i,r in enumerate(ratios) if (not np.isfinite(r)) or r > 1e6 or abs(arr[i].max())>1e12]
if crazy:
    print("Bands with extreme ranges detected (will be clipped/logged/dropped):")
    for i in crazy:
        print(f" - {names[i]}  min={arr[i].min():.6g}  max={arr[i].max():.6g}")

for i in crazy:
    band_data = arr[i].ravel()
    p05, p995 = np.nanpercentile(band_data, [0.5, 99.5])
    if not np.isfinite(p05) or not np.isfinite(p995) or p995==p05:
        print("  dropping band (nonfinite percentiles):", names[i])
        arr[i,:,:] = np.nan
    else:
        print(f"  clipping band {names[i]} to [{p05:.3g}, {p995:.3g}]")
        arr[i,:,:] = np.clip(arr[i,:,:], p05, p995)

nan_bands = [i for i in range(arr.shape[0]) if np.all(np.isnan(arr[i]))]
if nan_bands:
    print("Dropping bands with all NaNs:", [names[i] for i in nan_bands])
    keep = [i for i in range(arr.shape[0]) if i not in nan_bands]
    arr = arr[keep,:,:]
    names = [names[i] for i in keep]

bands = arr.shape[0]
print("Final band list used:")
for i,nm in enumerate(names,1):
    print(f"{i:02d}. {nm}")

In [None]:
import rasterio
from rasterio.mask import mask
import geopandas as gpd
from shapely.geometry import box

with rasterio.open(MASK_TIF) as ms:
    mask_arr = ms.read(1)
    mask_meta = ms.meta.copy()
    bounds = ms.bounds

with rasterio.open(BASE_TIF) as src:
    window = src.window(*bounds)
    arr_crop_base = src.read(window=window)
    transform_crop = src.window_transform(window)

with rasterio.open(roads_tif) as src:
    arr_crop_roads = src.read(window=window, out_shape=(src.count, arr_crop_base.shape[1], arr_crop_base.shape[2]))

arr_crop = np.concatenate([arr_crop_base, arr_crop_roads], axis=0)


target = np.zeros((arr_crop.shape[1], arr_crop.shape[2]), dtype=np.uint8)
reproject(
    source=mask_arr,
    destination=target,
    src_transform=mask_meta['transform'],
    src_crs=mask_meta['crs'],
    dst_transform=transform_crop,
    dst_crs=src.crs,
    resampling=WarpResampling.nearest
)

mask_aligned = (target != 0).astype(np.uint8)

print("Mask aligned:", mask_aligned.shape, np.unique(mask_aligned))
print("Cropped satellite array:", arr_crop.shape)


ys, xs = np.where(~np.isnan(mask_aligned))
print("Found labeled pixels:", len(ys))


b4_idx = None
for k in ['B4','B8A','B8','B3']:
    idx = find_idx_by_sub(k)
    if idx is not None and idx in keep_idx:
        name_found = [i for i,nm in enumerate(names) if k in nm.upper()]
        if name_found:
            b4_idx = name_found[0]
            break
if b4_idx is None:
    intensity = np.nanmean(arr, axis=0)
else:
    intensity = arr[b4_idx,:,:]
imin, imax = np.nanpercentile(intensity[~np.isnan(intensity)], [1,99])
int_norm = (intensity - imin) / (imax - imin + 1e-9)
int_norm = np.clip(int_norm, 0.0, 1.0)