In [47]:
import os
from concurrent.futures import ProcessPoolExecutor
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from shapely.geometry import box

def ensure_crs(gdf, target_crs):
    """Reproject GeoDataFrame if needed."""
    if gdf.crs != target_crs:
        return gdf.to_crs(target_crs)
    return gdf

def clip_raster_to_bbox(raster_path, bbox_geom, output_path):
    """Clip raster using bounding box and save to output path."""
    with rasterio.open(raster_path) as src:
        bbox = [bbox_geom]
        clipped, transform = mask(src, bbox, crop=True)

        out_meta = src.meta.copy()
        out_meta.update({
            "driver": "GTiff",
            "height": clipped.shape[1],
            "width": clipped.shape[2],
            "transform": transform
        })

        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with rasterio.open(output_path, "w", **out_meta) as dest:
            dest.write(clipped)

def process_single_polygon(id, sg_bounds, sg_crs, tif_files, tilename, vboxes_dir):
    """Process and clip all rasters for a single polygon."""
    for tif_path in tif_files:
        with rasterio.open(tif_path) as src:
            if sg_crs != src.crs:
                sg_bbox = gpd.GeoSeries([box(*sg_bounds)], crs=sg_crs).to_crs(src.crs).geometry.values[0]
            else:
                sg_bbox = box(*sg_bounds)

            out_dir = os.path.join(vboxes_dir, tilename, f'ID{id}')
            out_filename = os.path.join(out_dir, os.path.basename(tif_path))
            clip_raster_to_bbox(tif_path, sg_bbox, out_filename)

def clip_tifs_by_vbox_rois(vpath, tif_files, tilename="mytile", vboxes_dir="vboxes", max_workers=4, parallel=True):
    """Main function to clip rasters by vector bounding boxes."""
    os.makedirs(vboxes_dir, exist_ok=True)
    gdf = gpd.read_file(vpath)
    gdf['id'] = gdf.index

    tasks = []
    for id, row in gdf.iterrows():
        sg_bounds = row.geometry.bounds
        sg_crs = gdf.crs
        tasks.append((id, sg_bounds, sg_crs, tif_files, tilename, vboxes_dir))

    if parallel:
        with ProcessPoolExecutor(max_workers=max_workers) as executor:
            executor.map(lambda args: process_single_polygon(*args), tasks)
    else:
        for task in tasks:
            process_single_polygon(*task)


In [48]:
tilename = "N13E103"
ldem_fn = f"/media/ljp238/12TBWolf/BRCHIEVE/TILES12/{tilename}/{tilename}_ldem.tif"
esa_fn = f"/media/ljp238/12TBWolf/BRCHIEVE/TILES12/{tilename}/{tilename}_esawc_x.tif"
s1_fn = f"/media/ljp238/12TBWolf/BRCHIEVE/TILES12/{tilename}/{tilename}_s1.tif"
s2_fn = f"/media/ljp238/12TBWolf/BRCHIEVE/TILES12/{tilename}/{tilename}_s2.tif"
edem_fn = f"/media/ljp238/12TBWolf/BRCHIEVE/TILES12/{tilename}/{tilename}_edem_egm.tif"
eth_fn = f"/media/ljp238/12TBWolf/BRCHIEVE/TILES12/{tilename}/{tilename}_etchm.tif"
wsfbh = f"/media/ljp238/12TBWolf/BRCHIEVE/TILES12/{tilename}/{tilename}_wsfbh.tif"
vpath = f"/home/ljp238/Downloads/vboxes/{tilename}_ROIs.gpkg" # change the name 
## add more variables like eth,hem
tif_files = [ldem_fn, esa_fn, s1_fn,s2_fn, edem_fn,eth_fn,wsfbh]
vboxes_dir = "vboxes"
os.makedirs(vboxes_dir, exist_ok=True)
cpus = 12

In [None]:
# cpus = 12
# clip_tifs_by_vbox_rois(vpath=vpath, tif_files=tif_files,
#                      tilename=tilename, vboxes_dir=vboxes_dir, 
#                      max_workers=cpus)

In [50]:
clip_tifs_by_vbox_rois(vpath=vpath, tif_files=tif_files,
                     tilename=tilename, vboxes_dir=vboxes_dir, 
                     parallel=False)

- run 
- clean [x]
- zip 
- upload to both onedrive and gmail 
- push to git 
