# ðŸ‡ºðŸ‡¬ Uganda Flood Timeseries Generator (2020â€“2025)

This notebook computes a **flood level metric** (fraction of each district flooded) for Uganda from **2020â€“2025**, using:

- **Sentinel-1 SAR** flood detection
- **JRC Global Surface Water** (permanent water mask)
- **GPM IMERG** precipitation (optional)
- **Uganda district boundaries**

Outputs: **CSV dataset** with flood_fraction per district per month.

**Before running:** place your district shapefile as `uganda_admin2.shp` in the working directory (or update the path in the notebook).

## Environment Setup

In [1]:
from pathlib import Path
import ee, geemap
import numpy as np
import geopandas as gpd
import pandas as pd
import datetime
from shapely.geometry import mapping
from time import sleep

## Folder Structure

In [2]:
raw_data_dir = Path("../data/raw")
proc_data_dir = Path("../data/processed/climate")
proc_data_dir.mkdir(parents = True, exist_ok=True)

## Parameters

In [4]:
# Path to shapefile
DISTRICT_SHP_PATH = raw_data_dir / "geographies/uganda_herbert/uganda_districts.shp" 

# Baseline period and study period
START_DATE = '2020-01-01'
END_DATE   = '2025-08-31'
BASELINE_START = '2015-01-01'
BASELINE_END   = '2019-12-31'

# Flood detection parameters
ANOMALY_THRESHOLD = -3.0   # dB
PERM_WATER_OCCURRENCE_PCT = 50


S1_COLLECTION_ID = 'COPERNICUS/S1_GRD'
DRIVE_FOLDER = Path('/Users/paoich/Library/CloudStorage/GoogleDrive-andrichpaolo@gmail.com/My Drive/data/flood_files')
CHUNK_SIZE = 50  # how many images to submit per run (tune to your EE quotas)
SCALE = 10  # meters for pixelArea reductions
BATCH_SIZE = 20           # how many image metadata items to fetch per getInfo() call (50 is safe)
SLEEP_BETWEEN_BATCHES = 1 # seconds between batches to be polite to EE servers
SLEEP_BETWEEN_EXPS = 0.5  # seconds between export submissions

PROCESSED_LOG = str((DRIVE_FOLDER / 'processed_images.jsonl').resolve())  # local log file (newline-delimited JSON)
RESUBMIT_FAILED = False  

## Setup Google Earth Engine

In [2]:
ee.Authenticate()

True

In [3]:
ee.Initialize(project = "divine-catalyst-330916")

In [5]:
districts_gdf = (
    gpd
    .read_file(DISTRICT_SHP_PATH)
    .rename(columns = str.lower)
    .assign(
        district = lambda x: np.where(
            x["district"] == "SSEMBABULE",
            "Sembabule",
            x["district"].str.title()
        ),
        area_m2 = lambda x: x["geometry"].area
    )
    .to_crs(epsg=4326)
    [[
        "district",
        "area_m2",
        "geometry"
    ]]
)

districts = []
for idx, row in districts_gdf.iterrows():
    name = row.get("district")
    districts.append({
        "id": idx,
        "name": name,
        "geometry": mapping(row.geometry),
        "area_m2": row.geometry.area
    })

print(f"Loaded {len(districts)} districts.")

Loaded 146 districts.


In [7]:
S1 = ee.ImageCollection('COPERNICUS/S1_GRD')
GSW = ee.Image('JRC/GSW1_4/GlobalSurfaceWater')
print("Datasets loaded.")

Datasets loaded.


In [8]:
uganda_ee = geemap.geopandas_to_ee(districts_gdf.dissolve())

def s1_vv(col, region):
    return (col
            .filter(ee.Filter.eq('instrumentMode', 'IW'))
            .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))
            .filterBounds(region)
            .select('VV'))

In [12]:
baseline_by_district_month = {}
for d in districts_gdf["district"]:
    baseline_district = {}
    district_ee = geemap.geopandas_to_ee(districts_gdf.loc[lambda x: x["district"] == d]).geometry()
    for m in range(1,13):
        monthly_col = s1_vv(
            S1.filterDate(BASELINE_START, BASELINE_END).filter(ee.Filter.calendarRange(m,m,"month")), 
            district_ee
        )
        median_img = monthly_col.median()
        baseline_district[m] = median_img
    baseline_by_district_month[d] = baseline_district

In [None]:
def detect_flood(start, end, region):
    start_dt = pd.to_datetime(start)
    month = int(start_dt.month)

    period = s1_vv(S1.filterDate(start, end), uganda_ee).median()
    baseline = baseline_by_month[month]
    anomaly = period.subtract(baseline)

    perm_water = GSW.select('occurrence').gte(50)
    flood = anomaly.lte(ANOMALY_THRESHOLD).And(perm_water.Not())

    return flood.rename('flood')

In [None]:
def monthly_periods(start, end):
    rng = pd.date_range(start, end, freq='MS')
    periods = []
    for d in rng:
        periods.append((d.strftime('%Y-%m-%d'), (d + pd.offsets.MonthEnd(1)).strftime('%Y-%m-%d')))
    return periods

periods = monthly_periods(START, END)
periods[:3], periods[-3:]

In [None]:
rows = []

for start, end in periods:
    print("Processing:", start, end)
    flood_img = detect_flood(start, end)
    area_img = ee.Image.pixelArea().updateMask(flood_img)

    for d in districts:
        geom = ee.Geometry(d['geometry'])
        stats = area_img.reduceRegion(
            reducer=ee.Reducer.sum(),
            geometry=geom,
            scale=10,
            maxPixels=1e13
        )
        flooded = 0
        try:
            flooded = stats.getInfo().get('area') if stats.getInfo() else 0
        except Exception:
            flooded = 0

        flood_fraction = flooded / d['area_m2'] if d['area_m2'] > 0 else None

        rows.append({
            'period_start': start,
            'period_end': end,
            'district_id': d['id'],
            'district_name': d['name'],
            'flooded_m2': flooded,
            'district_area_m2': d['area_m2'],
            'flood_fraction': flood_fraction
        })

In [None]:
import pandas as pd
df = pd.DataFrame(rows)
df.to_csv(OUTPUT_CSV, index=False)
df.head()