In [None]:
# importing necessary libraries
import geopandas as gpd
from tqdm import tqdm
import matplotlib.pyplot as plt
import sys
import os

# Add the project root to sys.path so we can import from Code.utils everywhere
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from Code.utils.utility import load_config, resolve_path, ssa_iso
from Code.utils.spatial_utility import optimized_clip

# Load configuration
config = load_config()

Trim CPIS into Aridity Layers and Reproject

In [4]:
# Define a function to process CPIS data for a specific aridity layer
def process_cpis_layer(layer_name, config, gdf_cpis):
    # Resolve paths
    aridity_shp_path = resolve_path(config[f'Africa_{layer_name}_shp_path'])
    output_path = resolve_path(config[f'Combined_CPIS_{layer_name}_shp_path'])

    # Load the aridity shapefile
    aridity_gdf = gpd.read_file(aridity_shp_path)

    # Ensure CRS matches
    aridity_gdf = aridity_gdf.to_crs(gdf_cpis.crs)

    # Filter CPIS data using optimized clipping
    filtered_cpis = optimized_clip(gdf_cpis, aridity_gdf)

    # Save the filtered data
    filtered_cpis.to_file(output_path, driver='ESRI Shapefile')
    print(f"Processed CPIS data for {layer_name} layer and saved to {output_path}")

# Load and reproject the main CPIS dataset
gdf_cpis = gpd.read_file(resolve_path(config['Combined_CPIS_shp_path']))
gdf_cpis = gdf_cpis.to_crs("EPSG:3857")

# Save the reprojected dataset (optional)
gdf_cpis.to_file(resolve_path(config['Combined_CPIS_Reproj_shp_path']))

# Process each aridity layer
aridity_layers = ['Semi_Arid', 'Arid', 'Hyper_Arid', 'All']
for layer in tqdm(aridity_layers, desc="Processing CPIS Layers"):
    process_cpis_layer(layer, config, gdf_cpis)

Processing CPIS Layers:  25%|██▌       | 1/4 [00:20<01:00, 20.23s/it]

Processed CPIS data for Semi_Arid layer and saved to /home/waves/data/Africa_Irrigation/Data/Processed/Combined_CPIS_Aridity_Layers-shp/Combined_CPIS_Semi_Arid.shp


Processing CPIS Layers:  50%|█████     | 2/4 [00:32<00:31, 15.56s/it]

Processed CPIS data for Arid layer and saved to /home/waves/data/Africa_Irrigation/Data/Processed/Combined_CPIS_Aridity_Layers-shp/Combined_CPIS_Arid.shp


Processing CPIS Layers:  75%|███████▌  | 3/4 [00:38<00:11, 11.04s/it]

Processed CPIS data for Hyper_Arid layer and saved to /home/waves/data/Africa_Irrigation/Data/Processed/Combined_CPIS_Aridity_Layers-shp/Combined_CPIS_Hyper_Arid.shp


Processing CPIS Layers: 100%|██████████| 4/4 [01:00<00:00, 15.20s/it]

Processed CPIS data for All layer and saved to /home/waves/data/Africa_Irrigation/Data/Processed/Combined_CPIS_Aridity_Layers-shp/Combined_CPIS_All.shp





Filter the 'All' layer to SSA and add year column

In [None]:
# Load the "All" layer
all_layer_path = resolve_path(config['Combined_CPIS_All_shp_path'])
gdf_cpis_all = gpd.read_file(all_layer_path)

# Filter to Sub-Saharan Africa using `ssa_iso`
gdf_cpis_ssa = gdf_cpis_all[gdf_cpis_all['Country Co'].isin(ssa_iso)].copy()

# Add a "Year" column based on year flags
gdf_cpis_ssa['Year'] = gdf_cpis_ssa.apply(
    lambda row: 2000 if row['year_2000'] == 1 else (2021 if row['year_2021'] == 1 else None),
    axis=1
)

# Drop the old year columns
gdf_cpis_ssa = gdf_cpis_ssa.drop(columns=['year_2000', 'year_2021'])

# Save the filtered dataset
ssa_output_path = resolve_path(config['SSA_Combined_CPIS_All_shp_path'])
gdf_cpis_ssa.to_file(ssa_output_path, driver='ESRI Shapefile')
print(f"Filtered SSA CPIS dataset saved to {ssa_output_path}")

Index(['ID', 'year_2000', 'year_2021', 'Country', 'Country Co', 'geometry'], dtype='object')
Filtered SSA CPIS dataset saved to /home/waves/data/Africa_Irrigation/Data/Processed/Combined_CPIS_Aridity_Layers-shp/SSA_Combined_CPIS_All.shp
