In [3]:
# Data Verification and Spatial Processing for Irrigation and Dam Command Areas in Sub-Saharan Africa
# This notebook prepares and verifies geospatial data to compare irrigation distributions inside and outside dam command areas, following advisor feedback. Each step is clearly documented and outputs are printed for verification.

# **Outline:**
# 1. Import Required Libraries
# 2. Load Geospatial Datasets
# 3. Inspect and Harmonize Coordinate Reference Systems (CRS)
# 4. Check and Process Command Area Overlaps
# 5. Define Study Area and Generate Inside/Outside Polygons
# 6. Extract Irrigation Pixel Values for Inside and Outside Regions
# 7. Save Extracted Pixel Values to CSV
# 8. Visualize Command Areas and Study Domain
# 9. Visualize Sampled Irrigation Data Extraction

import geopandas as gpd
import rasterio
import rasterio.mask
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import box, mapping
from shapely.ops import unary_union
import os
import sys
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Add the project root to sys.path so we can import from Code.utils everywhere
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from Code.utils.utility import load_config, resolve_path, ssa_iso, africa_iso_countries, africa_iso_countries_filtered
from Code.utils.spatial_utility import load_and_reproject, optimized_clip

# Load configuration
config = load_config()

# --- Load Geospatial Datasets using utility functions ---
# Load data using utility functions and resolved paths
cpis = load_and_reproject(resolve_path(config['SSA_Combined_CPIS_All_shp_path']), target_crs="EPSG:3857")
dams = load_and_reproject(resolve_path(config['AridAfrica_Barriers_shp_path']), target_crs="EPSG:3857")
ca = load_and_reproject(resolve_path(config['No_Crop_Vectorized_Command_Area_shp_path']), target_crs="EPSG:3857")
irrig_raster_2000_path = resolve_path(config['Africa_AEI_2000_asc_path'])
irrig_raster_2015_path = resolve_path(config['Africa_AEI_2015_asc_path'])
ssa_arid_shp_fp = resolve_path(config['SSA_Arid_by_Country_shp_path'])

print('CPIS CRS:', cpis.crs)
print(cpis.head())
print('Dams CRS:', dams.crs)
print(dams.head())
print('Command Areas CRS:', ca.crs)
print(ca.head())

# Load irrigation rasters
with rasterio.open(irrig_raster_2000_path) as src:
    irrigation_2000_meta = src.meta
    print('Irrigation 2000 raster meta:', irrigation_2000_meta)
with rasterio.open(irrig_raster_2015_path) as src:
    irrigation_2015_meta = src.meta
    print('Irrigation 2015 raster meta:', irrigation_2015_meta)

# --- Inspect and Harmonize Coordinate Reference Systems (CRS) ---
# All vectors are loaded as EPSG:3857 by utility, so CRS should match
print('All vector datasets loaded in EPSG:3857.')

# Check raster CRS
print('Irrigation 2000 raster CRS:', irrigation_2000_meta['crs'])
print('Irrigation 2015 raster CRS:', irrigation_2015_meta['crs'])
if irrigation_2000_meta['crs'] != ca.crs:
    print('Raster CRS does not match vector CRS. Recommend using rasterio.warp.reproject to reproject raster to EPSG:3857.')
if irrigation_2015_meta['crs'] != ca.crs:
    print('Raster CRS does not match vector CRS. Recommend using rasterio.warp.reproject to reproject raster to EPSG:3857.')

# --- Check and Process Command Area Overlaps ---
# Check for overlapping polygons in command_areas
print('Checking for overlaps in command areas...')
from shapely.strtree import STRtree
geoms = list(ca.geometry)
tree = STRtree(geoms)
overlap_found = False
for i, geom in enumerate(geoms):
    matches = [j for j in tree.query(geom) if j != geom and geom.intersects(j)]
    if matches:
        overlap_found = True
        break
if overlap_found:
    print('Overlaps detected in command areas!')
else:
    print('No overlaps detected in command areas.')

# Dissolve command areas into a single geometry
n_initial = len(ca)
total_command_area = ca.dissolve()
n_after = len(total_command_area)
print(f'Number of initial command areas: {n_initial}')
print(f'Number of polygons after dissolve: {n_after}')

# --- Define Study Area and Generate Inside/Outside Polygons ---
# Use SSA arid shapefile as study area boundary (or raster bounds if not available)
if os.path.exists(ssa_arid_shp_fp):
    study_area = load_and_reproject(ssa_arid_shp_fp, target_crs="EPSG:3857")
    study_area_union = unary_union(study_area.geometry)
else:
    # Use raster bounds as fallback
    bounds = rasterio.open(irrig_raster_2000_path).bounds
    study_area_union = box(*bounds)
    study_area = gpd.GeoDataFrame({'geometry': [study_area_union]}, crs=ca.crs)

# Create outside area by subtracting total_command_area from study_area
inside_geom = total_command_area.geometry.unary_union
outside_geom = study_area_union.difference(inside_geom)
total_outside_area = gpd.GeoDataFrame({'geometry': [outside_geom]}, crs=ca.crs)

print('Study area and inside/outside polygons defined.')

# --- Extract Irrigation Pixel Values for Inside and Outside Regions ---
def extract_pixels(raster_fp, mask_geom):
    with rasterio.open(raster_fp) as src:
        out_image, out_transform = rasterio.mask.mask(src, [mapping(mask_geom)], crop=False, filled=True)
        arr = out_image[0]
        mask = arr != src.nodata
        values = arr[mask]
        return values

# Extract for 2000
inside_pixels_2000 = extract_pixels(irrig_raster_2000_path, inside_geom)
outside_pixels_2000 = extract_pixels(irrig_raster_2000_path, outside_geom)
# Extract for 2015
inside_pixels_2015 = extract_pixels(irrig_raster_2015_path, inside_geom)
outside_pixels_2015 = extract_pixels(irrig_raster_2015_path, outside_geom)

print('Extracted pixel values for inside/outside, 2000/2015.')

# --- Save Extracted Pixel Values to CSV ---
pd.DataFrame({'irrigation_value': inside_pixels_2000}).to_csv('processed_irrigation_inside_2000.csv', index=False)
pd.DataFrame({'irrigation_value': outside_pixels_2000}).to_csv('processed_irrigation_outside_2000.csv', index=False)
pd.DataFrame({'irrigation_value': inside_pixels_2015}).to_csv('processed_irrigation_inside_2015.csv', index=False)
pd.DataFrame({'irrigation_value': outside_pixels_2015}).to_csv('processed_irrigation_outside_2015.csv', index=False)
print('Saved extracted pixel values to CSV files.')

# --- Visualize Command Areas and Study Domain ---
fig, ax = plt.subplots(figsize=(12, 10))
if os.path.exists(ssa_arid_shp_fp):
    study_area.plot(ax=ax, color='lightgray', edgecolor='black', alpha=0.5, label='Study Area')
total_outside_area.plot(ax=ax, color='whitesmoke', edgecolor='gray', alpha=0.7, label='Outside Area')
total_command_area.boundary.plot(ax=ax, color='blue', linewidth=2, label='Command Areas')
plt.title('Command Areas and Study Domain')
plt.legend()
plt.tight_layout()
plt.savefig('command_areas_study_domain.png', dpi=300)
plt.show()
print('Saved map: command_areas_study_domain.png')

# --- Visualize Sampled Irrigation Data Extraction (2015) ---
# Sample up to 5000 points for each region for visualization
sample_n = 5000
inside_sample = inside_pixels_2015 if len(inside_pixels_2015) <= sample_n else np.random.choice(inside_pixels_2015, sample_n, replace=False)
outside_sample = outside_pixels_2015 if len(outside_pixels_2015) <= sample_n else np.random.choice(outside_pixels_2015, sample_n, replace=False)

# For visualization, get coordinates of nonzero pixels (approximate, not exact locations)
def get_pixel_coords(raster_fp, mask_geom, sample_n):
    with rasterio.open(raster_fp) as src:
        out_image, out_transform = rasterio.mask.mask(src, [mapping(mask_geom)], crop=False, filled=True)
        arr = out_image[0]
        mask = arr != src.nodata
        coords = np.column_stack(np.where(mask))
        # Convert array indices to spatial coordinates
        xs, ys = rasterio.transform.xy(src.transform, coords[:,0], coords[:,1])
        # Sample
        if len(xs) > sample_n:
            idx = np.random.choice(len(xs), sample_n, replace=False)
            xs = np.array(xs)[idx]
            ys = np.array(ys)[idx]
        return xs, ys

inside_xs, inside_ys = get_pixel_coords(irrig_raster_2015_path, inside_geom, sample_n)
outside_xs, outside_ys = get_pixel_coords(irrig_raster_2015_path, outside_geom, sample_n)

fig, ax = plt.subplots(figsize=(12, 10))
if os.path.exists(ssa_arid_shp_fp):
    study_area.plot(ax=ax, color='lightgray', edgecolor='black', alpha=0.5)
total_command_area.boundary.plot(ax=ax, color='blue', linewidth=2, label='Command Areas')
ax.scatter(outside_xs, outside_ys, s=1, color='orange', alpha=0.3, label='Outside Pixels (sample)')
ax.scatter(inside_xs, inside_ys, s=1, color='green', alpha=0.3, label='Inside Pixels (sample)')
plt.title('Sampled Irrigation Pixels (2015) Inside/Outside Command Areas')
plt.legend()
plt.tight_layout()
plt.savefig('sampled_irrigation_pixels_2015.png', dpi=300)
plt.show()
print('Saved map: sampled_irrigation_pixels_2015.png')


FileNotFoundError: [Errno 2] No such file or directory: '../../config.yaml'