# Groundwater Productivity Targeting Analysis
This notebook analyzes the relationship between groundwater productivity and the spatial distribution of Center Pivot Irrigation Systems (CPIS) and irrigated land across Africa. It calculates targeting ratios for CPIS relative to irrigated area by groundwater productivity category, using bootstrapping to estimate confidence intervals.

# Import Required Libraries and Utilities
Import all necessary libraries and utility functions, including configuration and region ISO dictionaries from the project's utility files.

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Point
from scipy.spatial import cKDTree
from tqdm import tqdm
import numpy as np
import sys
import os

# Add the project root to sys.path so we can import from Code.utils everywhere
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from Code.utils.utility import load_config, resolve_path, regions_dict

# Load configuration
config = load_config()

# Helper for SSA filtering
def filter_ssa(df, iso_col='ISO'):
    northern_african_countries = regions_dict['Northern Africa']
    return df[~df[iso_col].isin(northern_african_countries)]

## Prepare Groundwater Productivity Data
Load and process the groundwater productivity dataset, assign categories, and save as a GeoDataFrame.

In [None]:
# Define groundwater productivity category mappings
value_map_cat = {'VH': 20.0, 'H': 12.5, 'M': 3.0, 'LM': 0.75, 'L': 0.3, 'VL': 0.1}
reverse_map_cat = {v: k for k, v in value_map_cat.items()}

# Load the groundwater productivity data
GW_path = resolve_path(config['Groundwater_Productivity_path'])
df_GW = pd.read_csv(GW_path, sep=r'\s+')
geometry = [Point(xy) for xy in tqdm(zip(df_GW['X'], df_GW['Y']), desc="Creating geometries")]
gdf_GW = gpd.GeoDataFrame(df_GW, geometry=geometry, crs='EPSG:4326').to_crs(epsg=3857)
gdf_GW['GW_prod_cat_L_s'] = gdf_GW['GWPROD_V2'].map(value_map_cat)
gdf_GW['GW_prod_cat'] = gdf_GW['Liters_Second'].map(reverse_map_cat)

## Assign Groundwater Value to Each Irrigated Pixel
Spatially join AEI data with groundwater productivity and calculate AEI statistics by country and productivity category.

In [None]:
# Load the AEI data by country
AEI_path = resolve_path(config['AEI_2015_All_shp_path'])
AEI_by_country = gpd.read_file(AEI_path).to_crs(gdf_GW.crs)

# Spatial join: assign each GW point to an AEI polygon
gdf_combined = gpd.sjoin(gdf_GW, AEI_by_country, how="left", predicate="within")
gdf_combined = gdf_combined.rename(columns={'GWPROD_V2': 'GW_prod_cat'})
gdf_combined = gdf_combined.drop(columns=['index_right']).reset_index(drop=True)

# Group by country and groundwater productivity category to calculate total AEI
AEI_by_country_and_prod = gdf_combined.groupby(['ISO', 'GW_prod_cat'])['raster_val'].sum().reset_index()
# Calculate total AEI per country
total_AEI_per_country = gdf_combined.groupby('ISO')['raster_val'].sum().reset_index().rename(columns={'raster_val': 'Total_AEI'})
# Merge and calculate percent AEI per category
merged_with_AEI = AEI_by_country_and_prod.merge(total_AEI_per_country, on='ISO')
merged_with_AEI['Percent_AEI'] = merged_with_AEI['raster_val'] / merged_with_AEI['Total_AEI']
# Restrict to SSA
merged = filter_ssa(merged_with_AEI, iso_col='ISO')

## Assign Groundwater Values to Each Center Pivot
Assign the nearest groundwater productivity value to each CPIS polygon and summarize by productivity level and country.

In [None]:
# Load the CPIS shapefile
gdf_CPIS_cc = gpd.read_file(resolve_path(config['Combined_CPIS_All_shp_path'])).to_crs(gdf_GW.crs)
gdf_CPIS = gdf_CPIS_cc.rename(columns={'Country Co': 'ISO'})
# Calculate centroids of the CPIS polygons
cp_centroids_coords = np.array(list(gdf_CPIS.geometry.centroid.apply(lambda geom: (geom.x, geom.y))))
# Extract coordinates of groundwater points
gw_coords = np.array(list(gdf_GW.geometry.apply(lambda geom: (geom.x, geom.y))))
# Build a KDTree for the groundwater points
tree = cKDTree(gw_coords)
_, idx = tree.query(cp_centroids_coords, k=1)
# Assign the nearest groundwater productivity value to each CPIS
gdf_CPIS['GW_prod_cat'] = gdf_GW.iloc[idx]['GW_prod_cat'].values
# Restrict to SSA
gdf_CPIS_SSA = filter_ssa(gdf_CPIS, iso_col='ISO')
# Group by groundwater productivity category to calculate the total number of CPIS (per level overall and by country)
CPIS_SSA_per_country_level = gdf_CPIS_SSA.groupby(['ISO', 'GW_prod_cat']).size().reset_index(name='CPIS_per_country_level')
CPIS_SSA_per_level = gdf_CPIS_SSA.groupby('GW_prod_cat').size().reset_index(name='CPIS_count')

## Merge AEI and CPIS Data for Targeting Ratio Calculation
Merge the AEI and CPIS summary tables to prepare for targeting ratio analysis.

In [None]:
# Group by 'ISO' and count the number of points in each country
points_per_country = gdf_CPIS.groupby('ISO').size().reset_index(name='Point_Count')
# Merge on 'ISO' and 'GW_prod_cat'
merged_with_cpis = merged_with_AEI.merge(CPIS_SSA_per_country_level, on=['ISO', 'GW_prod_cat'], how='left')
# Fill NaN values in CPIS count column with 0 and convert to int
merged_with_cpis['CPIS_per_country_level'] = merged_with_cpis['CPIS_per_country_level'].fillna(0).astype(int)
merged_with_cpis['GW_prod_cat_L_s'] = merged_with_cpis['GW_prod_cat'].map(value_map_cat)

## Calculate Targeting Ratios with Bootstrapping
Compute the targeting ratio for each groundwater productivity level, including confidence intervals using bootstrapping.

In [None]:
def bootstrap_targeting_ratio(numerator, denominator, num_bootstrap=1000):
    ratios = []
    for _ in range(num_bootstrap):
        boot_numerator = np.random.poisson(numerator)
        boot_denominator = np.random.poisson(denominator)
        if boot_denominator != 0:
            boot_ratio = boot_numerator / boot_denominator
        else:
            boot_ratio = np.nan
        ratios.append(boot_ratio)
    ratios = np.array(ratios)
    return np.nanpercentile(ratios, 2.5), np.nanpercentile(ratios, 97.5)

targeting_ratios = []
for prod_level in CPIS_SSA_per_level['GW_prod_cat']:
    CPIS_count = CPIS_SSA_per_level[CPIS_SSA_per_level['GW_prod_cat'] == prod_level]['CPIS_count'].values[0]
    denominator = 0
    for country in points_per_country['ISO']:
        try:
            country_cpis_count = points_per_country.loc[points_per_country['ISO'] == country, 'Point_Count'].values[0]
            percent_aei = merged_with_cpis[(merged_with_cpis['ISO'] == country) & (merged_with_cpis['GW_prod_cat'] == prod_level)]['Percent_AEI'].values[0]
            if np.isnan(country_cpis_count) or np.isnan(percent_aei):
                continue
            denominator += country_cpis_count * percent_aei
        except KeyError:
            continue
    if denominator == 0:
        targeting_ratio = np.nan
        ci_lower, ci_upper = np.nan, np.nan
    else:
        targeting_ratio = CPIS_count / denominator
        ci_lower, ci_upper = bootstrap_targeting_ratio(CPIS_count, denominator)
    targeting_ratios.append({
        'GW_prod_cat_L_s': prod_level,
        'Numerator': CPIS_count,
        'Denominator': denominator,
        'Targeting_Ratio': targeting_ratio,
        'CI_Lower': ci_lower,
        'CI_Upper': ci_upper
    })
df_targeting_ratios = pd.DataFrame(targeting_ratios)

## Visualize Targeting Ratios and Components
Plot the numerators, denominators, and targeting ratios with confidence intervals for each groundwater productivity category.

In [None]:
# Ensure the correct order of the groundwater productivity categories for plotting
desired_order = ['VL', 'L', 'LM', 'M', 'H', 'VH']
df_targeting_ratios['GW_prod_cat_L_s'] = pd.Categorical(df_targeting_ratios['GW_prod_cat_L_s'], categories=desired_order, ordered=True)
df_targeting_ratios = df_targeting_ratios.sort_values('GW_prod_cat_L_s')

plt.figure(figsize=(14, 7))
plt.subplot(1, 2, 1)
plt.bar(df_targeting_ratios['GW_prod_cat_L_s'], df_targeting_ratios['Numerator'], color='seagreen')
plt.title('Numerators by Groundwater Productivity Category')
plt.xlabel('Groundwater Productivity Category')
plt.ylabel('Numerator Value')
plt.grid(axis='y', linestyle='--', linewidth=0.5, alpha=0.7)
plt.subplot(1, 2, 2)
plt.bar(df_targeting_ratios['GW_prod_cat_L_s'], df_targeting_ratios['Denominator'], color='darkolivegreen')
plt.title('Denominators by Groundwater Productivity Category')
plt.xlabel('Groundwater Productivity Category')
plt.ylabel('Denominator Value')
plt.grid(axis='y', linestyle='--', linewidth=0.5, alpha=0.7)
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 6))
ratios = df_targeting_ratios['Targeting_Ratio']
lower_errors = ratios - df_targeting_ratios['CI_Lower']
upper_errors = df_targeting_ratios['CI_Upper'] - ratios
plt.errorbar(df_targeting_ratios['GW_prod_cat_L_s'], ratios, yerr=[lower_errors, upper_errors],
             fmt='o', ecolor='darkgreen', capsize=5, elinewidth=2, markeredgewidth=2, color='seagreen')
plt.axhline(y=1, color='olivedrab', linestyle='--', linewidth=1, label='Reference Line at 1')
plt.xlabel('Groundwater Productivity Category')
plt.ylabel('Targeting Ratio')
plt.title('Targeting Ratios with 95% Confidence Intervals')
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', linewidth=0.5, alpha=0.7)
plt.tight_layout()
plt.legend()
plt.show()