In [3]:
import sys
import os

# Add the project root to sys.path so we can import from Code.utils everywhere
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from Code.utils.utility import load_config, resolve_path, ssa_iso
from Code.utils.spatial_utility import load_and_reproject, optimized_clip

import geopandas as gpd
import pandas as pd
from shapely.geometry import Point

# Load configuration
config = load_config()

##### Filter the dam data

In [3]:
# Load the global dam dataset
global_dams_path = resolve_path(config['Global_Dam_Data_csv_path'])
global_dams = pd.read_csv(global_dams_path)

# Strip whitespace from the ISO column and filter to SSA countries
global_dams['ISO'] = global_dams['ISO'].str.strip()
ssa_dams = global_dams[global_dams['ISO'].isin(ssa_iso)].copy()

# Filter dams where the 'Purpose' column contains the word 'Irrigation'
irrigation_dams = ssa_dams[ssa_dams['Purpose'].str.contains('Irrigation', case=False, na=False)].copy()
print(irrigation_dams.count())

# Save the filtered irrigation dams to a CSV file
irrigation_dams_path = resolve_path(config['Africa_Dam_Irrigation_csv_path'])
irrigation_dams.to_csv(irrigation_dams_path, index=False)
print(f"Filtered irrigation dams saved to {irrigation_dams_path}")

Country                 335
ISO                     335
Sec_cntry                 4
Name                    335
AltDamName               60
Purpose                 335
Admin_unit              326
Near_city               329
River                   284
Main_basin              330
Sub_basin               305
Status                  335
PVOUT                   335
PotentialPVSurface      335
PV_InstalledCapacity    335
PV_SpaCapacity          335
Dam_hgt                 335
Res_capacityM3          335
Res_area_km2            335
HPP_Install_Cap         335
Transm_exist            335
Transm_length           335
Transm_plan             335
Proj_type                 0
Lifecycle                 0
Storage_ty                0
Linked_prj                0
Proj_cost               335
Proj_statu                0
Study_stat                0
YEAR                    335
Yr_recents              335
Comission_              335
Ann_firm_g              335
Ann_tot_ge              335
Prox_irrig          

Turn the CSV into a GDF and trim to arid regions

In [5]:
# Convert irrigation dams to a GeoDataFrame
geometry = [Point(xy) for xy in zip(irrigation_dams['Long__res_'], irrigation_dams['Lat__res_'])]
gdf_dams = gpd.GeoDataFrame(irrigation_dams, geometry=geometry)
gdf_dams = gdf_dams.set_crs("EPSG:4326")  # Ensure the CRS is correct
print(f"Number of dams after converting to GeoDataFrame: {gdf_dams.shape[0]}")

# Reproject to a projected CRS for spatial operations
gdf_dams = gdf_dams.to_crs("EPSG:3857")
print(f"Reprojected GeoDataFrame to EPSG:3857 for spatial operations.")

# Define aridity layers to process
aridity_layers = ['Semi_Arid', 'Arid', 'Hyper_Arid', 'All']

# Iterate through each aridity layer
for layer in aridity_layers:
    # Load the aridity shapefile
    aridity_shp_path = resolve_path(config[f'Africa_{layer}_shp_path'])
    aridity_gdf = load_and_reproject(aridity_shp_path, target_crs="EPSG:3857")
    
    if aridity_gdf is None:
        print(f"Failed to load {layer} aridity shapefile. Skipping...")
        continue

    # Filter dams to the current aridity layer
    filtered_dams = optimized_clip(gdf_dams, aridity_gdf)
    print(f"Number of dams in {layer} region: {filtered_dams.shape[0]}")

    # Rename columns to avoid truncation in ESRI Shapefile
    filtered_dams = filtered_dams.rename(columns={"Long__res_": "Longitude", "Lat__res_": "Latitude"})

    # Save the filtered dams to a shapefile
    output_path = resolve_path(config[f'Africa_Dam_{layer}_shp_path'])
    if filtered_dams.empty:
        print(f"No dams found in {layer} region. Skipping save.")
    else:
        filtered_dams.to_file(output_path, driver='ESRI Shapefile')
        print(f"Filtered dams for {layer} region saved to {output_path}")

Number of dams after converting to GeoDataFrame: 335
Reprojected GeoDataFrame to EPSG:3857 for spatial operations.
Number of dams in Semi_Arid region: 235


  filtered_dams.to_file(output_path, driver='ESRI Shapefile')


Filtered dams for Semi_Arid region saved to /home/waves/data/Africa_Irrigation/Data/Processed/Africa_Dam_Aridity_Layers-shp/Africa_Dam_Semi_Arid.shp
Number of dams in Arid region: 43


  filtered_dams.to_file(output_path, driver='ESRI Shapefile')


Filtered dams for Arid region saved to /home/waves/data/Africa_Irrigation/Data/Processed/Africa_Dam_Aridity_Layers-shp/Africa_Dam_Arid.shp
Number of dams in Hyper_Arid region: 0
No dams found in Hyper_Arid region. Skipping save.
Number of dams in All region: 278


  filtered_dams.to_file(output_path, driver='ESRI Shapefile')


Filtered dams for All region saved to /home/waves/data/Africa_Irrigation/Data/Processed/Africa_Dam_Aridity_Layers-shp/Africa_Dam_All_Arid.shp
