# Filter AQUASTAT Data

This notebook filters AQUASTAT data to African and Sub-Saharan African countries. It:
1. Loads the global AQUASTAT dataset
2. Adds ISO codes based on country names
3. Filters to African and Sub-Saharan African countries
4. Saves filtered datasets for all years, 2000, and 2021

In [1]:
import sys
import os

# Add the project root to sys.path so we can import from Code.utils everywhere
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from Code.utils import utility

import pandas as pd
import geopandas as gpd
import unicodedata


## Load and Filter AQUASTAT Data

In [2]:
# Load the global AQUASTAT data
try:
    AQUA_data = pd.read_csv(utility.resolve_path(utility.load_config()['AQUA_World_path']))
    print(f"Loaded AQUASTAT data with {len(AQUA_data)} rows")
except Exception as e:
    print(f"Error loading AQUASTAT data: {e}")

Loaded AQUASTAT data with 5841 rows


### Filter to All African Countries

In [3]:
# Create a mapping from country names to ISO codes for all African countries
ISO_mapping = {country: iso for iso, countries in utility.africa_iso_countries.items() for country in countries}

# Add ISO column and filter to African countries
AQUA_data['ISO'] = AQUA_data['Area'].map(ISO_mapping)
AQUA_data_africa = AQUA_data.dropna(subset=['ISO'])

print(f"Filtered to {len(AQUA_data_africa)} rows for African countries")

# Save the filtered data
try:
    output_dir_africa = utility.resolve_path(utility.load_config()['AQUA_AfricaIrrigation'])
    AQUA_data_africa.to_csv(output_dir_africa, index=False)
    print(f"Saved African data to {output_dir_africa}")
except Exception as e:
    print(f"Error saving African data: {e}")

Filtered to 1642 rows for African countries
Saved African data to /home/waves/data/Africa_Irrigation/Data/Processed/AQUA_AfricaIrrigation.csv


### Filter to Sub-Saharan African Countries

In [4]:
# Create a mapping from country names to ISO codes for SSA countries
ISO_mapping_SSA = {country: iso for iso, countries in utility.africa_iso_countries_filtered.items() for country in countries}

# Add ISO column and filter to SSA countries
AQUA_data['ISO'] = AQUA_data['Area'].map(ISO_mapping_SSA)
AQUA_data_SSA = AQUA_data.dropna(subset=['ISO'])

print(f"Filtered to {len(AQUA_data_SSA)} rows for Sub-Saharan African countries")

# Save the filtered data
try:
    output_dir_SSA = utility.resolve_path(utility.load_config()['AQUA_SSAIrrigation'])
    AQUA_data_SSA.to_csv(output_dir_SSA, index=False)
    print(f"Saved SSA data to {output_dir_SSA}")
except Exception as e:
    print(f"Error saving SSA data: {e}")

Filtered to 1397 rows for Sub-Saharan African countries
Saved SSA data to /home/waves/data/Africa_Irrigation/Data/Processed/AQUA_SSAIrrigation.csv


## Extract Data for Specific Years (2000 and 2021)

In [5]:
# Filter data for years 2000 and 2021
AQUA_Africa2000 = AQUA_data_africa[AQUA_data_africa['Year'] == 2000].copy()
AQUA_Africa2021 = AQUA_data_africa[AQUA_data_africa['Year'] == 2021].copy()
AQUA_SSA2000 = AQUA_data_SSA[AQUA_data_SSA['Year'] == 2000].copy()
AQUA_SSA2021 = AQUA_data_SSA[AQUA_data_SSA['Year'] == 2021].copy()

print(f"Africa 2000: {len(AQUA_Africa2000)} rows")
print(f"Africa 2021: {len(AQUA_Africa2021)} rows")
print(f"SSA 2000: {len(AQUA_SSA2000)} rows")
print(f"SSA 2021: {len(AQUA_SSA2021)} rows")

Africa 2000: 49 rows
Africa 2021: 53 rows
SSA 2000: 42 rows
SSA 2021: 46 rows


In [6]:
# Get output paths from config
config = utility.load_config()
output_paths = {
    'Africa2000': utility.resolve_path(config['AQUA_AfricaIrrigation_2000']),
    'Africa2021': utility.resolve_path(config['AQUA_AfricaIrrigation_2021']),
    'SSA2000': utility.resolve_path(config['AQUA_SSAIrrigation_2000']),
    'SSA2021': utility.resolve_path(config['AQUA_SSAIrrigation_2021'])
}

# Save filtered data for specific years
try:
    AQUA_Africa2000.to_csv(output_paths['Africa2000'], index=False)
    AQUA_Africa2021.to_csv(output_paths['Africa2021'], index=False)
    AQUA_SSA2000.to_csv(output_paths['SSA2000'], index=False)
    AQUA_SSA2021.to_csv(output_paths['SSA2021'], index=False)
    print("Successfully saved all year-specific datasets")
except Exception as e:
    print(f"Error saving year-specific datasets: {e}")

Successfully saved all year-specific datasets
