In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd
from scipy import ndimage

from shapely.geometry import shape

from rasterio.features import shapes

import rasterio

In [None]:
# space_object_form_pipeline_fixed.py



def spatial_objects_formation(
    excavationmaps,
    mine_boundary_gdf,
    x_pt_mon,
    min_area_m2=1000.0,
):
    """
    Convert excavation binary rasters to vector objects with correct CRS/units.

    excavationmaps: dict[datetime -> xarray.DataArray] with values {0,1}
    mine_boundary_gdf: GeoDataFrame with mine polygon(s)
    raster_crs: CRS of the excavation rasters (e.g., 'EPSG:32644')
    raster_transform: affine transform of raster grid (from a reference raster)
    """

    ref_da = x_pt_mon["NDVI"]
    ref_da = ref_da.rio.write_crs("EPSG:32644", inplace=True)  # example; set actual UTM
    raster_crs = ref_da.rio.crs
    raster_transform = ref_da.rio.transform()

    # Ensure mine boundary has CRS
    if mine_boundary_gdf.crs is None:
        raise ValueError("mine_boundary_gdf must have a valid CRS.")

    objects = []
    mine_crs = mine_boundary_gdf.crs

    for t, da in excavationmaps.items():
        # da: xarray.DataArray with dims (y, x)
        mask = da.values.astype(np.uint8)
        if mask.ndim != 2:
            raise ValueError("excavation DataArray must be 2D (y, x).")

        # Use rasterio.features.shapes to vectorize
        for geom, val in shapes(mask, transform=raster_transform):
            if val != 1:
                continue
            geom_shape = shape(geom)
            objects.append({"time": t, "geometry": geom_shape})

    if not objects:
        return gpd.GeoDataFrame(columns=["time", "geometry"], crs=raster_crs)

    gdf = gpd.GeoDataFrame(objects, crs=raster_crs)

    # Reproject polygons to mine CRS for overlay and plotting
    gdf = gdf.to_crs(mine_crs)

    # Clip to mine boundary
    gdf = gpd.overlay(gdf, mine_boundary_gdf, how="intersection")

    # Compute area in a metric CRS (if mine CRS not metric, reproject temporarily)
    if not mine_crs.is_projected:
        # Pick a suitable UTM from mine centroid
        centroid = mine_boundary_gdf.to_crs("EPSG:4326").unary_union.centroid
        lon, lat = centroid.x, centroid.y
        utm_zone = int(np.floor((lon + 180) / 6) + 1)
        utm_epsg = f"EPSG:{32600 + utm_zone if lat >= 0 else 32700 + utm_zone}"
        gdf_metric = gdf.to_crs(utm_epsg)
    else:
        gdf_metric = gdf

    gdf_metric["area_m2"] = gdf_metric.geometry.area

    # Filter by minimum area
    gdf_metric = gdf_metric[gdf_metric["area_m2"] >= min_area_m2]

    # Return to mine CRS for output
    gdf_out = gdf_metric.to_crs(mine_crs)

    return gdf_out


In [None]:
# Spacial object fromation 

# Example integration snippet inside pipeline_upto_space

# xptmon is your monitoring cube (xarray.Dataset)
# Get CRS and transform from a reference band, e.g. NDVI or original Sentinel-2 raster


objectsgdf = spatial_objects_formation(
    excavationmaps=excavation_maps,
    mine_boundary_gdf=mine_sel,  # from your shapefile
    min_area_m2=2000.0,
)


In [None]:

def spatial_objects_degrees(
    excavationmaps,
    mine_boundary_gdf,
    min_pixels=3,
    overlap_thresh=0.3,
    confidence_days=15
):
    """
    Spatial object extraction WITH start_date assignment.
    EPSG:4326, pixel-count filtering.
    """

    all_objects = []
    previous_objects = gpd.GeoDataFrame(
        {"geometry": [], "start_date": []},
        geometry="geometry",
        crs="EPSG:32644"
    )


    for t in sorted(excavationmaps.keys()):
        exc_map = excavationmaps[t]

        # pixel area in hectares (UTM → meters)
        res_x, res_y = exc_map.rio.resolution()
        pixel_area_ha = abs(res_x * res_y) / 10_000

        n_pixels_total = int((exc_map == 1).sum())
        if n_pixels_total == 0:
            continue

        print(f"{t}: {n_pixels_total} pixels")

        mask_binary = (exc_map == 1).values
        labeled, num_features = ndimage.label(mask_binary)

        polygons = []

        for label_id in range(1, num_features + 1):
            component_mask = (labeled == label_id)
            pixel_count = np.sum(component_mask)

            if pixel_count < min_pixels:
                continue

            rows, cols = np.nonzero(component_mask)
            if len(rows) == 0:
                continue

            min_row, max_row = rows.min(), rows.max()
            min_col, max_col = cols.min(), cols.max()

            transform = exc_map.rio.transform()
            corners = [
                rasterio.transform.xy(transform, min_row, min_col),
                rasterio.transform.xy(transform, min_row, max_col),
                rasterio.transform.xy(transform, max_row, max_col),
                rasterio.transform.xy(transform, max_row, min_col),
            ]

            poly = shape({
                "type": "Polygon",
                "coordinates": [corners]
            })

            # ---------- START_DATE LOGIC ----------
            start_date = t

            if not previous_objects.empty:
                overlaps = previous_objects.geometry.intersection(poly)
                overlap_areas = overlaps.area
                poly_area = poly.area

                if poly_area > 0:
                    iou_like = overlap_areas / poly_area
                    if (iou_like >= overlap_thresh).any():
                        matched_idx = iou_like.idxmax()
                        start_date = previous_objects.loc[matched_idx, "start_date"]
            # -------------------------------------

            # ---------- CONFIDENCE (temporal persistence) ----------
            duration_days = (pd.to_datetime(t) - pd.to_datetime(start_date)).days
            confidence = np.clip(duration_days / confidence_days, 0.0, 1.0)
            # ------------------------------------------------------

            area_ha = pixel_count * pixel_area_ha

            polygons.append({
                "date": t,
                "start_date": start_date,
                "geometry": poly,
                "pixel_count": pixel_count,
                "area_ha": area_ha,
                "confidence": confidence
            })


        if not polygons:
            continue

        gdf_date = gpd.GeoDataFrame(
            polygons,
            geometry="geometry",
            crs="EPSG:32644"
        )

        gdf_clipped = gpd.overlay(gdf_date, mine_boundary_gdf, how="intersection")

        all_objects.append(gdf_clipped)

        # Update memory for next timestep
        previous_objects = gdf_clipped[["geometry", "start_date"]].copy()

        print(f"  → {len(gdf_clipped)} objects (≥{min_pixels} pixels)")

    if not all_objects:
        return gpd.GeoDataFrame(
            {
                "date": [],
                "start_date": [],
                "pixel_count": [],
                "area_ha": [],
                "confidence": [],
                "geometry": []
            },
            geometry="geometry",
            crs="EPSG:32644"
        )


    return gpd.pd.concat(all_objects, ignore_index=True)


In [None]:
# RUN THIS:
objectsgdf = spatial_objects_degrees(excavationmaps, mine_sel, min_pixels=2)
print(f"\n✅ TOTAL: {len(objectsgdf)} excavation objects!")
print(objectsgdf[['date', 'pixel_count']].head(10))