In [39]:
import geopandas as gpd
import rasterio
from rasterio.crs import CRS
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterio.crs import CRS
from rasterstats import zonal_stats
from rasterio.mask import mask
import numpy as np
import pandas as pd
import networkx as nx
from shapely.ops import unary_union
from shapely.strtree import STRtree
from shapely.geometry import mapping
import tempfile
import os
import sys
import logging

# Add the project root to sys.path so we can import from Code.utils everywhere
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from Code.utils.utility import load_config, resolve_path
from Code.utils.spatial_utility import load_and_reproject

# Load configuration
config = load_config()

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)


logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s %(levelname)s: %(message)s',
    handlers=[
        logging.FileHandler("trim_debug.log", mode='w'),  # overwrite each time
        logging.StreamHandler()  # optional: show in notebook output
    ]
)

logger = logging.getLogger(__name__)


In [40]:
def save_masked_raster(array, transform, meta, out_path):
        meta_out = meta.copy()
        meta_out.update({
            'driver': 'AAIGrid',
            'dtype': array.dtype,
            'height': array.shape[0],
            'width': array.shape[1],
            'transform': transform,
            'crs': meta['crs'],
            'count': 1
        })
        logger.info(f"💾 Writing to: {out_path}")
        with rasterio.open(out_path, 'w', **meta_out) as dst:
            dst.write(array, 1)

def trim_aei_to_study_area():
    import time
    logger.info(f"⏱️ Function entry time: {time.time()}")
    ssa_arid_shp_fp = resolve_path(config['SSA_Arid_by_Country_shp_path'])
    study_area = load_and_reproject(ssa_arid_shp_fp, target_crs="EPSG:3857")
    study_area_union = study_area.geometry.unary_union

    aei_years = [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015]

    for year in aei_years:
        logger.info(f"🔁 Processing year: {year}")
        raster_key = f"Africa_AEI_{year}_asc_path"
        irrig_raster_path = resolve_path(config[raster_key])
        output_key = f"Irrigation_Arid_SSA_{year}_asc_path"
        output_path = resolve_path(config[output_key])

        with rasterio.open(irrig_raster_path) as src:
            meta = src.meta.copy()
            if src.crs is None:
                logger.info(f"⚠️ Warning: AEI raster for {year} missing CRS. Forcing EPSG:3857.")
                meta['crs'] = CRS.from_epsg(3857)

            masked, out_transform = mask(
                src,
                [study_area_union],
                crop=True,
                all_touched=True,
                nodata=src.nodata
            )

        array = masked[0]
        assert array.ndim == 2, f"Expected 2D array, got shape {array.shape}"
        logger.info(f"📐 Masked array shape: {array.shape}")
        
        meta.update({
            "transform": out_transform,
            "height": array.shape[0],
            "width": array.shape[1]
        })

        save_masked_raster(array, out_transform, meta, output_path)
        logger.info(f"✅ Trimmed AEI raster for {year} (all_touched) saved to: {output_path}")


In [41]:
trim_aei_to_study_area()

2025-07-16 17:54:30,579 INFO: ⏱️ Function entry time: 1752713670.579795
2025-07-16 17:54:38,525 INFO: 🔁 Processing year: 1980
2025-07-16 17:54:38,525 INFO: 🔁 Processing year: 1980
2025-07-16 17:54:40,994 INFO: 📐 Masked array shape: (2160, 4320)
2025-07-16 17:54:40,994 INFO: 📐 Masked array shape: (2160, 4320)
2025-07-16 17:54:40,995 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1980.asc
2025-07-16 17:54:40,995 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1980.asc
2025-07-16 17:54:44,644 INFO: ✅ Trimmed AEI raster for 1980 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1980.asc
2025-07-16 17:54:44,644 INFO: ✅ Trimmed AEI raster for 1980 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1980.asc
2025-07-16 17:54:44,645 INFO: 🔁 Processing year: 1985
2025-07-16 17:54:44,645 INFO: 🔁 Processing year: 1985
2025-07-16

In [25]:
# --- Merge Overlapping Command Areas and Save as Shapefile ---

# Load command area shapefile
ca = load_and_reproject(resolve_path(config['No_Crop_Vectorized_Command_Area_shp_path']), target_crs="EPSG:3857")

# Build an undirected graph where nodes are polygon indices, edges mean overlap
G = nx.Graph()
G.add_nodes_from(range(len(ca)))
for i, geom1 in enumerate(ca.geometry):
    for j in range(i+1, len(ca)):
        geom2 = ca.geometry.iloc[j]
        if geom1.intersects(geom2):
            G.add_edge(i, j)

# Find connected components (groups of overlapping polygons)
groups = list(nx.connected_components(G))

# Merge polygons in each group
merged_geoms = []
n_merged = []
merged_gdw_ids = []
for group in groups:
    group_indices = list(group)
    group_df = ca.iloc[group_indices]
    merged_geom = unary_union(group_df.geometry)
    merged_geoms.append(merged_geom)
    n_merged.append(len(group_df))
    merged_gdw_ids.append(list(group_df['GDW_ID']))

CA_No_Overlap = gpd.GeoDataFrame({
    'geometry': merged_geoms,
    'n_merged': n_merged,
    'merged_GDW_IDs': [",".join(map(str, ids)) for ids in merged_gdw_ids]
}, crs=ca.crs)

# Check for overlaps in CA_No_Overlap
geoms = list(CA_No_Overlap.geometry)
overlap_found = False
for i, geom in enumerate(geoms):
    matches = [j for j, other in enumerate(geoms) if i != j and geom.intersects(other)]
    if matches:
        overlap_found = True
        print(f"Overlap found for geometry {i} (overlaps with: {matches})")
        break
if not overlap_found:
    output_path = resolve_path(config['No_Crop_Vectorized_CA_UniLayer_shp_path'])
    CA_No_Overlap.to_file(output_path, driver='ESRI Shapefile')
    print(f"Saved non-overlapping command areas to: {output_path}")
else:
    print('Overlaps detected in CA_No_Overlap!')

  CA_No_Overlap.to_file(output_path, driver='ESRI Shapefile')


Saved non-overlapping command areas to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp
