In [None]:
import logging
import networkx as nx
import numpy as np
import os
import rasterio
from rasterio.crs import CRS
from rasterio.io import MemoryFile
from rasterio.mask import mask
import sys
import time
from shapely.ops import unary_union

# Add the project root to sys.path so we can import from Code.utils everywhere
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from Code.utils.spatial_utility import load_and_reproject
from Code.utils.utility import load_config, resolve_path

# Load configuration
config = load_config()

for handler in logging.root.handlers[:]:
    logging.root.removeHandler(handler)

logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s %(levelname)s: %(message)s',
    handlers=[
        logging.FileHandler("trim_debug.log", mode='w'),  # overwrite each time
        logging.StreamHandler()  # optional: show in notebook output
    ]
)

logger = logging.getLogger(__name__)

### Cropping Irrigation Rasters to Study Area
This section processes irrigation rasters to focus on the arid regions of Sub-Saharan Africa (SSA). It involves reprojecting the study area shapefile, masking rasters to the study area, and saving the trimmed rasters for analysis.

In [None]:
def save_masked_raster(array, transform, meta, out_path):
    meta_out = meta.copy()
    meta_out.update({
        'driver': 'GTiff',
        'dtype': array.dtype,
        'height': array.shape[0],
        'width': array.shape[1],
        'transform': transform,
        'crs': meta['crs'],
        'count': 1
    })
    logger.info(f"Writing to: {out_path}")
    with rasterio.open(out_path, 'w', **meta_out) as dst:
        dst.write(array, 1)

def trim_aei_to_study_area():
    logger.info(f"Function entry time: {time.time()}")

    # Reproject study area to match raster CRS (EPSG:4326)
    ssa_arid_shp_fp = resolve_path(config['SSA_Arid_by_Country_shp_path'])
    study_area = load_and_reproject(ssa_arid_shp_fp, target_crs="EPSG:4326")
    logger.info(f"Study area CRS: {study_area.crs}")
    study_area_union = study_area.geometry.unary_union

    aei_years = [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015]

    for year in aei_years:
        logger.info(f"Processing year: {year}")
        raster_key = f"Africa_AEI_{year}_asc_path"
        irrig_raster_path = resolve_path(config[raster_key])
        output_key = f"Irrigation_Arid_SSA_{year}_tif_path"
        output_path = resolve_path(config[output_key])

        # Load AEI raster and assign CRS if missing
        with rasterio.open(irrig_raster_path) as src_file:
            bounds = src_file.bounds
            meta = src_file.meta.copy()

            if (
                src_file.crs is None or
                (src_file.crs.to_epsg() == 3857 and -180 <= bounds.left <= 180 and -90 <= bounds.bottom <= 90)
            ):
                logger.info(f"Forcing CRS to EPSG:4326 due to known metadata error or missing CRS.")
                meta['crs'] = CRS.from_epsg(4326)

                # Re-open the raster in memory with corrected CRS
                mem = MemoryFile()
                with mem.open(**meta) as patched_src:
                    patched_src.write(src_file.read(1), 1)
                src = mem.open()
            else:
                src = src_file

            # Mask to study area (EPSG:4326)
            masked, out_transform = mask(
                src,
                [study_area_union],
                crop=True,
                all_touched=True,
                nodata=src.nodata
            )

        print("Raster bounds:", src.bounds)
        print("Study area bounds:", study_area_union.bounds)

        array = masked[0]
        assert array.ndim == 2, f"Expected 2D array, got shape {array.shape}"
        logger.info(f"Masked array shape: {array.shape}")
        
        meta.update({
            "transform": out_transform,
            "height": array.shape[0],
            "width": array.shape[1]
        })

        print("Min:", np.min(array))
        print("Max:", np.max(array))

        save_masked_raster(array, out_transform, meta, output_path)
        logger.info(f"Trimmed AEI raster for {year} (all_touched) saved to: {output_path}")
        
trim_aei_to_study_area()

2025-07-17 18:08:40,482 INFO: ⏱️ Function entry time: 1752800920.4826636


2025-07-17 18:08:41,548 INFO: Study area CRS: EPSG:4326


KeyboardInterrupt: 

### Merge overlaps in command areas and save as separate shapefile

In [None]:
import geopandas as gpd

# Load command area shapefile
ca = load_and_reproject(resolve_path(config['No_Crop_Vectorized_Command_Area_shp_path']), target_crs="EPSG:3857")

# Load dam shapefile and map years to command areas
dam_gdf = load_and_reproject(resolve_path(config['GDW_Arid_SSA_Final_Irr_shp_path']), target_crs="EPSG:3857")
dam_years = dam_gdf.set_index('GDW_ID')['YEAR_DAM'].to_dict()
ca['YEAR_DAM'] = ca['GDW_ID'].map(dam_years)

# Define years for analysis
years = [1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015]

output_path = resolve_path(config['No_Crop_Vectorized_CA_UniLayer_shp_path'])

for year in years:
    ca_year = ca[ca['YEAR_DAM'] <= year].copy().reset_index(drop=True)
    if ca_year.empty:
        print(f"No command areas found for year {year}. Skipping...")
        continue



    # Build an undirected graph where nodes are polygon indices, edges mean overlap
    G = nx.Graph()
    G.add_nodes_from(range(len(ca_year)))
    for i, geom1 in enumerate(ca_year.geometry):
        for j in range(i+1, len(ca_year)):
            geom2 = ca_year.geometry.iloc[j]
            if geom1.intersects(geom2):
                G.add_edge(i, j)

    # Find connected components (groups of overlapping polygons)
    groups = list(nx.connected_components(G))
    merged_geoms = []
    n_merged = []
    merged_gdw_ids = []

    for group in groups:
        group_indices = list(group)
        group_df = ca_year.iloc[group_indices]
        merged_geom = unary_union(group_df.geometry)
        merged_geoms.append(merged_geom)
        n_merged.append(len(group_df))
        merged_gdw_ids.append(list(group_df['GDW_ID']))

    CA_No_Overlap = gpd.GeoDataFrame({
            'geometry': merged_geoms,
            'n_merged': n_merged,
            'merged_GDW': [",".join(map(str, ids)) for ids in merged_gdw_ids]
        }, crs=ca.crs)

    # Check for overlaps in CA_No_Overlap
    geoms = list(CA_No_Overlap.geometry)
    overlap_found = False
    for i, geom in enumerate(geoms):
        matches = [j for j, other in enumerate(geoms) if i != j and geom.intersects(other)]
        if matches:
            print(f"Overlap found in year {year} for geometry {i} (overlaps with: {matches})")
            overlap_found = True
            break

    if not overlap_found:
        out_path = os.path.join(output_path, f"merged_CA_{year}.shp")
        CA_No_Overlap.to_file(out_path, driver='ESRI Shapefile')
        print(f"[{year}] Saved to: {out_path}")
    else:
        print(f"[{year}] Overlaps detected — file not saved.")

  CA_No_Overlap.to_file(out_path, driver='ESRI Shapefile')


[1980] ✅ Saved to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp/merged_CA_1980.shp


  CA_No_Overlap.to_file(out_path, driver='ESRI Shapefile')


[1985] ✅ Saved to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp/merged_CA_1985.shp


  CA_No_Overlap.to_file(out_path, driver='ESRI Shapefile')


[1990] ✅ Saved to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp/merged_CA_1990.shp


  CA_No_Overlap.to_file(out_path, driver='ESRI Shapefile')


[1995] ✅ Saved to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp/merged_CA_1995.shp


  CA_No_Overlap.to_file(out_path, driver='ESRI Shapefile')


[2000] ✅ Saved to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp/merged_CA_2000.shp


  CA_No_Overlap.to_file(out_path, driver='ESRI Shapefile')


[2005] ✅ Saved to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp/merged_CA_2005.shp


  CA_No_Overlap.to_file(out_path, driver='ESRI Shapefile')


[2010] ✅ Saved to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp/merged_CA_2010.shp


  CA_No_Overlap.to_file(out_path, driver='ESRI Shapefile')


[2015] ✅ Saved to: /home/waves/data/Africa_Irrigation/Data/Processed/No_Crop_Vectorized_UniLayer_CA-shp/merged_CA_2015.shp
