In [10]:
import logging
import networkx as nx
import numpy as np
import os
import pandas as pd
import rasterio
from rasterio.crs import CRS
from rasterio.io import MemoryFile
from rasterio.mask import mask
from rasterio.warp import calculate_default_transform, reproject, Resampling
from rasterstats import zonal_stats
import sys
import tempfile
import time
from shapely.geometry import mapping
from shapely.ops import unary_union
from shapely.strtree import STRtree

# Add the project root to sys.path so we can import from Code.utils everywhere
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from Code.utils.spatial_utility import load_and_reproject
from Code.utils.utility import load_config, resolve_path

# Load configuration
config = load_config()

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s %(levelname)s: %(message)s',
    handlers=[
        logging.FileHandler("trim_debug.log", mode='w'),  # overwrite each time
        logging.StreamHandler()  # optional: show in notebook output
    ]
)

logger = logging.getLogger(__name__)

### Cropping Irrigation Rasters to Study Area
This section processes irrigation rasters to focus on the arid regions of Sub-Saharan Africa (SSA). It involves reprojecting the study area shapefile, masking rasters to the study area, and saving the trimmed rasters for analysis.

In [11]:
def save_masked_raster(array, transform, meta, out_path):
    meta_out = meta.copy()
    meta_out.update({
        'driver': 'GTiff',
        'dtype': array.dtype,
        'height': array.shape[0],
        'width': array.shape[1],
        'transform': transform,
        'crs': meta['crs'],
        'count': 1
    })
    logger.info(f"💾 Writing to: {out_path}")
    with rasterio.open(out_path, 'w', **meta_out) as dst:
        dst.write(array, 1)

def trim_aei_to_study_area():
    logger.info(f"⏱️ Function entry time: {time.time()}")

    # Reproject study area to match raster CRS (EPSG:4326)
    ssa_arid_shp_fp = resolve_path(config['SSA_Arid_by_Country_shp_path'])
    study_area = load_and_reproject(ssa_arid_shp_fp, target_crs="EPSG:4326")
    logger.info(f"Study area CRS: {study_area.crs}")
    study_area_union = study_area.geometry.unary_union

    aei_years = [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015]

    for year in aei_years:
        logger.info(f"🔁 Processing year: {year}")
        raster_key = f"Africa_AEI_{year}_asc_path"
        irrig_raster_path = resolve_path(config[raster_key])
        output_key = f"Irrigation_Arid_SSA_{year}_tif_path"
        output_path = resolve_path(config[output_key])

        # Load AEI raster and assign CRS if missing
        with rasterio.open(irrig_raster_path) as src_file:
            bounds = src_file.bounds
            meta = src_file.meta.copy()

            if (
                src_file.crs is None or
                (src_file.crs.to_epsg() == 3857 and -180 <= bounds.left <= 180 and -90 <= bounds.bottom <= 90)
            ):
                logger.info(f"⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.")
                meta['crs'] = CRS.from_epsg(4326)

                # Re-open the raster in memory with corrected CRS
                mem = MemoryFile()
                with mem.open(**meta) as patched_src:
                    patched_src.write(src_file.read(1), 1)
                src = mem.open()
            else:
                src = src_file

            # Mask to study area (EPSG:4326)
            masked, out_transform = mask(
                src,
                [study_area_union],
                crop=True,
                all_touched=True,
                nodata=src.nodata
            )

        print("Raster bounds:", src.bounds)
        print("Study area bounds:", study_area_union.bounds)

        array = masked[0]
        assert array.ndim == 2, f"Expected 2D array, got shape {array.shape}"
        logger.info(f"📐 Masked array shape: {array.shape}")
        
        meta.update({
            "transform": out_transform,
            "height": array.shape[0],
            "width": array.shape[1]
        })

        print("Min:", np.min(array))
        print("Max:", np.max(array))

        save_masked_raster(array, out_transform, meta, output_path)
        logger.info(f"✅ Trimmed AEI raster for {year} (all_touched) saved to: {output_path}")
        
trim_aei_to_study_area()

2025-07-17 17:36:06,128 INFO: ⏱️ Function entry time: 1752798966.1288295
2025-07-17 17:36:08,789 INFO: Study area CRS: EPSG:4326
2025-07-17 17:36:08,789 INFO: Study area CRS: EPSG:4326
2025-07-17 17:36:15,826 INFO: 🔁 Processing year: 1980
2025-07-17 17:36:15,826 INFO: 🔁 Processing year: 1980
2025-07-17 17:36:16,040 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:16,040 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:20,937 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:20,937 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:20,939 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1980.tif
2025-07-17 17:36:20,939 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1980.tif


Raster bounds: BoundingBox(left=-180.0, bottom=-90.0, right=180.0, top=90.0)
Study area bounds: (-17.541666673165007, -34.83333327073998, 51.4156951904301, 27.2980709075929)
Min: -9.0
Max: 5679.718


2025-07-17 17:36:22,063 INFO: ✅ Trimmed AEI raster for 1980 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1980.tif
2025-07-17 17:36:22,065 INFO: 🔁 Processing year: 1985
2025-07-17 17:36:22,065 INFO: 🔁 Processing year: 1985
2025-07-17 17:36:22,355 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:22,355 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:27,557 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:27,557 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:27,559 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1985.tif
2025-07-17 17:36:27,559 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1985.tif


Raster bounds: BoundingBox(left=-180.0, bottom=-90.0, right=180.0, top=90.0)
Study area bounds: (-17.541666673165007, -34.83333327073998, 51.4156951904301, 27.2980709075929)
Min: -9.0
Max: 5900.111


2025-07-17 17:36:28,503 INFO: ✅ Trimmed AEI raster for 1985 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1985.tif
2025-07-17 17:36:28,505 INFO: 🔁 Processing year: 1990
2025-07-17 17:36:28,505 INFO: 🔁 Processing year: 1990
2025-07-17 17:36:28,796 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:28,796 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:33,989 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:33,991 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1990.tif
2025-07-17 17:36:33,989 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:33,991 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1990.tif


Raster bounds: BoundingBox(left=-180.0, bottom=-90.0, right=180.0, top=90.0)
Study area bounds: (-17.541666673165007, -34.83333327073998, 51.4156951904301, 27.2980709075929)
Min: -9.0
Max: 6073.035


2025-07-17 17:36:35,250 INFO: ✅ Trimmed AEI raster for 1990 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1990.tif
2025-07-17 17:36:35,252 INFO: 🔁 Processing year: 1995
2025-07-17 17:36:35,252 INFO: 🔁 Processing year: 1995
2025-07-17 17:36:35,560 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:35,560 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:40,860 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:40,860 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:40,861 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1995.tif
2025-07-17 17:36:40,861 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1995.tif


Raster bounds: BoundingBox(left=-180.0, bottom=-90.0, right=180.0, top=90.0)
Study area bounds: (-17.541666673165007, -34.83333327073998, 51.4156951904301, 27.2980709075929)
Min: -9.0
Max: 7504.133


2025-07-17 17:36:41,846 INFO: ✅ Trimmed AEI raster for 1995 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_1995.tif
2025-07-17 17:36:41,847 INFO: 🔁 Processing year: 2000
2025-07-17 17:36:41,847 INFO: 🔁 Processing year: 2000
2025-07-17 17:36:42,170 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:42,170 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:47,415 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:47,415 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:47,417 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2000.tif
2025-07-17 17:36:47,417 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2000.tif


Raster bounds: BoundingBox(left=-180.0, bottom=-90.0, right=180.0, top=90.0)
Study area bounds: (-17.541666673165007, -34.83333327073998, 51.4156951904301, 27.2980709075929)
Min: -9.0
Max: 7504.133


2025-07-17 17:36:48,246 INFO: ✅ Trimmed AEI raster for 2000 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2000.tif
2025-07-17 17:36:48,248 INFO: 🔁 Processing year: 2005
2025-07-17 17:36:48,248 INFO: 🔁 Processing year: 2005
2025-07-17 17:36:48,547 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:48,547 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:53,753 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:53,753 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:36:53,754 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2005.tif
2025-07-17 17:36:53,754 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2005.tif


Raster bounds: BoundingBox(left=-180.0, bottom=-90.0, right=180.0, top=90.0)
Study area bounds: (-17.541666673165007, -34.83333327073998, 51.4156951904301, 27.2980709075929)
Min: -9.0
Max: 7504.133


2025-07-17 17:36:54,606 INFO: ✅ Trimmed AEI raster for 2005 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2005.tif
2025-07-17 17:36:54,608 INFO: 🔁 Processing year: 2010
2025-07-17 17:36:54,608 INFO: 🔁 Processing year: 2010
2025-07-17 17:36:54,900 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:36:54,900 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:37:00,138 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:37:00,138 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:37:00,141 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2010.tif
2025-07-17 17:37:00,141 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2010.tif


Raster bounds: BoundingBox(left=-180.0, bottom=-90.0, right=180.0, top=90.0)
Study area bounds: (-17.541666673165007, -34.83333327073998, 51.4156951904301, 27.2980709075929)
Min: -9.0
Max: 7504.133


2025-07-17 17:37:01,038 INFO: ✅ Trimmed AEI raster for 2010 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2010.tif
2025-07-17 17:37:01,040 INFO: 🔁 Processing year: 2015
2025-07-17 17:37:01,040 INFO: 🔁 Processing year: 2015
2025-07-17 17:37:01,345 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:37:01,345 INFO: ⚠️ Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.
2025-07-17 17:37:06,682 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:37:06,682 INFO: 📐 Masked array shape: (746, 828)
2025-07-17 17:37:06,684 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2015.tif
2025-07-17 17:37:06,684 INFO: 💾 Writing to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2015.tif


Raster bounds: BoundingBox(left=-180.0, bottom=-90.0, right=180.0, top=90.0)
Study area bounds: (-17.541666673165007, -34.83333327073998, 51.4156951904301, 27.2980709075929)
Min: -9.0
Max: 7504.133


2025-07-17 17:37:07,080 INFO: ✅ Trimmed AEI raster for 2015 (all_touched) saved to: /home/waves/data/Africa_Irrigation/Data/Processed/Irrigation_Arid_SSA_2015.tif


### Merge overlaps in command areas and save as separate shapefile

In [12]:
# Load command area shapefile
ca = load_and_reproject(resolve_path(config['No_Crop_Vectorized_Command_Area_shp_path']), target_crs="EPSG:3857")

# Build an undirected graph where nodes are polygon indices, edges mean overlap
G = nx.Graph()
G.add_nodes_from(range(len(ca)))
for i, geom1 in enumerate(ca.geometry):
    for j in range(i+1, len(ca)):
        geom2 = ca.geometry.iloc[j]
        if geom1.intersects(geom2):
            G.add_edge(i, j)

# Find connected components (groups of overlapping polygons)
groups = list(nx.connected_components(G))

# Merge polygons in each group
merged_geoms = []
n_merged = []
merged_gdw_ids = []
for group in groups:
    group_indices = list(group)
    group_df = ca.iloc[group_indices]
    merged_geom = unary_union(group_df.geometry)
    merged_geoms.append(merged_geom)
    n_merged.append(len(group_df))
    merged_gdw_ids.append(list(group_df['GDW_ID']))

CA_No_Overlap = gpd.GeoDataFrame({
    'geometry': merged_geoms,
    'n_merged': n_merged,
    'merged_GDW_IDs': [",".join(map(str, ids)) for ids in merged_gdw_ids]
}, crs=ca.crs)

# Check for overlaps in CA_No_Overlap
geoms = list(CA_No_Overlap.geometry)
overlap_found = False
for i, geom in enumerate(geoms):
    matches = [j for j, other in enumerate(geoms) if i != j and geom.intersects(other)]
    if matches:
        overlap_found = True
        print(f"Overlap found for geometry {i} (overlaps with: {matches})")
        break
if not overlap_found:
    output_path = resolve_path(config['No_Crop_Vectorized_CA_UniLayer_shp_path'])
    CA_No_Overlap.to_file(output_path, driver='ESRI Shapefile')
    print(f"Saved non-overlapping command areas to: {output_path}")
else:
    print('Overlaps detected in CA_No_Overlap!')

  CA_No_Overlap.to_file(output_path, driver='ESRI Shapefile')


Saved non-overlapping command areas to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp
