# Generate Uganda Flood Timeseries (2020–2025)

This notebook computes a **flood_fraction** (flooded area / district area) for **each Sentinel-1 acquisition** over Uganda for the period 2020-01-01 to 2025-08-31.

Key points:
- Uses **Sentinel-1 GRD (VV)** per-acquisition backscatter anomalies vs a monthly baseline (2017-2019).
- Masks permanent water with **JRC Global Surface Water**.
- Produces a CSV where each row is one (image acquisition × district) with:
  - `acq_datetime`, `image_id`, `district_name`, `flooded_m2`, `district_area_m2`, `flood_fraction`, `coverage_flag`.
- This avoids a rigid daily grid and only reports when Sentinel-1 actually observed a district.


## Environment Setup

In [47]:
import os
from pathlib import Path
import ee, geemap
import numpy as np
import geopandas as gpd
import pandas as pd
import datetime
from shapely.geometry import mapping

## Setup Google Earth Engine

In [37]:
ee.Authenticate()

Enter verification code:  4/1Ab32j92_hrFPQMbQK_56yOH8ebGxEsKNhsy7hleCKtClrE4gpOnXwNpvRXM



Successfully saved authorization token.


In [8]:
ee.Initialize(project = "divine-catalyst-330916")

## Folder Structure

In [25]:
raw_data_dir = Path("../data/raw")
proc_data_dir = Path("../data/processed/climate/flood_files")
proc_data_dir.mkdir(parents = True, exist_ok=True)

## Parameters

In [48]:
START_DATE = "2020-01-01"
END_DATE   = "2025-08-31"
BASELINE_START = "2010-01-01"
BASELINE_END   = "2019-12-31"
ANOMALY_THRESHOLD = -3.0   # dB
PERM_WATER_OCCURRENCE_PCT = 50  # mask permanent water where occurrence >= this
DRIVE_META_FOLDER = Path('/Users/paoich/Library/CloudStorage/GoogleDrive-andrichpaolo@gmail.com/My Drive/data/flood_files/meta')
DRIVE_META_FOLDER.mkdir(parents = True, exist_ok = True)
LOCAL_META_FOLDER = proc_data_dir / "metadata"
LOCAL_META_FOLDER.mkdir(parents = True, exist_ok = True)
DRIVE_FOLDER = Path('/Users/paoich/Library/CloudStorage/GoogleDrive-andrichpaolo@gmail.com/My Drive/data/flood_files')
DRIVE_FOLDER.mkdir(parents = True, exist_ok = True)
OUTPUT_FILE = proc_data_dir / "uganda_flood_by_district.csv"

LOGFILE = proc_data_dir/ 'meta_batch_log.jsonl'
BATCH_SIZE = 50   # reduce to 5-20 if you still hit problems
SLEEP_BETWEEN_BATCHES = 1.0

## Import Files

### Shapes

In [35]:
districts_gdf = (
    gpd.read_file(
        raw_data_dir / 
        "geographies" /
        "uganda_herbert" /
        "uganda_districts.shp"
    )
    .rename(columns = str.lower)
    .assign(
        district = lambda x: np.where(
            x["district"] == "SSEMBABULE",
            "Sembabule",
            x["district"].str.title()
        ),
        area_m2 = lambda x: x["geometry"].area
    )
    .to_crs(4326)
    [[
        "district",
        "area_m2",
        "geometry"
    ]]      
)

In [36]:
uganda_geom = geemap.geopandas_to_ee(districts_gdf.dissolve())
districts_fc = geemap.geopandas_to_ee(districts_gdf)
def ensure_props(f):
    return f.set({'district': f.get('district'), 'area_m2': f.get('area_m2')})
districts_fc = districts_fc.map(ensure_props)

In [50]:
def make_feature_from_img(img):
    img = ee.Image(img)
    return ee.Feature(None, {
        'image_id': ee.String(img.get('system:index')),
        'time_start': ee.Number(img.get('system:time_start')),
        'orbit_pass': ee.String(img.get('orbitProperties_pass'))
    })

def append_log(rec):
    with open(LOGFILE, 'a') as fh:
        fh.write(json.dumps(rec) + '\n')
        fh.flush(); os.fsync(fh.fileno())

months = pd.date_range(START_DATE, END_DATE, freq='MS')
for m in months:
    m_start = m.strftime('%Y-%m-%d')
    m_end = (m + pd.offsets.MonthEnd(1)).strftime('%Y-%m-%d')
    print(f"Month {m_start} -> {m_end}")

    coll = (ee.ImageCollection("COPERNICUS/S1_GRD")
            .filterDate(m_start, m_end)
            .filter(ee.Filter.eq('instrumentMode','IW'))
            .filter(ee.Filter.listContains('transmitterReceiverPolarisation','VV'))
            .filterBounds(uganda_geom.geometry()))

    offset = 0
    batch_idx = 0
    while True:
        # server-side small list of images
        batch_list = coll.toList(BATCH_SIZE, offset)  # ee.List (server-side)

        # build an ee.FeatureCollection from that ee.List by mapping a server-side function
        batch_features_list = batch_list.map(lambda img: make_feature_from_img(img))
        batch_fc = ee.FeatureCollection(batch_features_list)

        # get batch size (small scalar getInfo is usually safe)
        try:
            batch_count = int(batch_fc.size().getInfo())
        except Exception:
            # If even this tiny meta call fails, reduce BATCH_SIZE and try again.
            print("Warning: batch size check via getInfo() failed. If errors continue, reduce BATCH_SIZE.")
            batch_count = None

        if batch_count == 0:
            print(f"  No more images at offset {offset}.")
            break

        file_prefix = f"s1_meta_{m_start}_batch{batch_idx:03d}"
        # Try server-side export first
        try:
            task = ee.batch.Export.table.toDrive({
                'collection': batch_fc,
                'description': f"export_{file_prefix}",
                'folder': DRIVE_META_FOLDER,
                'fileNamePrefix': file_prefix,
                'fileFormat': 'CSV'
            })
            task.start()
            print(f"  Submitted Drive export for {file_prefix} (features: {batch_count})")
            logrec = {'file_prefix': file_prefix, 'month': m_start, 'batch_idx': batch_idx,
                      'submitted_at_utc': pd.Timestamp.utcnow().isoformat(), 'method': 'drive_export'}
            append_log(logrec)

        except TypeError as te:
            # The client JSON serializer failed. Fallback: fetch small batch client-side and save CSV locally.
            print(f"  TypeError on task.start() for {file_prefix}: {te!r}")
            print("  Falling back to batch_fc.getInfo() -> local CSV (this is safe for small batches).")

            try:
                info = batch_fc.getInfo()  # small; OK when BATCH_SIZE is small
                # info is a dict with 'features' list
                feats = info.get('features', [])
                rows = []
                for f in feats:
                    props = f.get('properties', {})
                    rows.append({
                        'image_id': props.get('image_id'),
                        'time_start': props.get('time_start'),
                        'orbit_pass': props.get('orbit_pass')
                    })
                df = pd.DataFrame(rows)
                outpath = LOCAL_METADATA_DIR / f"{file_prefix}.csv"
                df.to_csv(outpath, index=False)
                print(f"  Wrote local CSV: {outpath} (rows: {len(df)})")
                logrec = {'file_prefix': file_prefix, 'month': m_start, 'batch_idx': batch_idx,
                          'submitted_at_utc': pd.Timestamp.utcnow().isoformat(), 'method': 'local_csv'}
                append_log(logrec)
            except Exception as e:
                # Log the failure and continue
                print(f"  Fallback getInfo() failed for batch {file_prefix}: {e!r}")
                logrec = {'file_prefix': file_prefix, 'month': m_start, 'batch_idx': batch_idx,
                          'submitted_at_utc': pd.Timestamp.utcnow().isoformat(), 'method': 'fallback_failed',
                          'error': str(e)}
                append_log(logrec)

        except Exception as e:
            # Other unexpected errors: log and continue
            print(f"  Unexpected error submitting export for {file_prefix}: {e!r}")
            append_log({'file_prefix': file_prefix, 'month': m_start, 'batch_idx': batch_idx,
                        'submitted_at_utc': pd.Timestamp.utcnow().isoformat(), 'method': 'error', 'error': str(e)})

        # advance to next batch
        batch_idx += 1
        offset += BATCH_SIZE
        time.sleep(SLEEP_BETWEEN_BATCHES)

    print(f"Month {m_start} scheduled.")
print("Done scheduling metadata.")

Month 2020-01-01 -> 2020-01-31
  TypeError on task.start() for s1_meta_2020-01-01_batch000: TypeError('Object of type FeatureCollection is not JSON serializable')
  Falling back to batch_fc.getInfo() -> local CSV (this is safe for small batches).
  Fallback getInfo() failed for batch s1_meta_2020-01-01_batch000: NameError("name 'LOCAL_METADATA_DIR' is not defined")


NameError: name 'json' is not defined

### Satellite collections

In [9]:
S1 = ee.ImageCollection("COPERNICUS/S1_GRD")
GSW = ee.Image("JRC/GSW1_4/GlobalSurfaceWater")  # "occurrence" band (0-100)

In [17]:
ee_features = []
prop_cols = ["district", "area"]
for _, row in sf_uga.iterrows():
    geom_json = mapping(row["geometry"])
    props = {col: row[col] for col in prop_cols}
    feat = ee.Feature(geom_json, props)      # create ee.Feature
    ee_features.append(feat)

fc = ee.FeatureCollection(ee_features)
uganda_ee = geemap.geopandas_to_ee(districts_gdf.dissolve())

In [12]:
# Build monthly baseline median VV (2017-2019) clipped to Uganda
def s1_vv_for_region(col, region):
    return (
        col
        .filter(ee.Filter.eq("instrumentMode", "IW"))
        .filter(ee.Filter.listContains("transmitterReceiverPolarisation", "VV"))
        .filterBounds(region)
        .select("VV")
    )

In [13]:
baseline_by_district_month = {}
for d in sf_uga["district"]:
    baseline_district = {}
    district_ee = geemap.geopandas_to_ee(sf_uga.loc[lambda x: x["district"] == d]).geometry()
    for m in range(1,13):
        monthly_col = s1_vv_for_region(
            S1.filterDate(BASELINE_START, BASELINE_END).filter(ee.Filter.calendarRange(m,m,"month")), 
            district_ee
        )
        median_img = monthly_col.median()
        baseline_district[m] = median_img
    baseline_by_district_month[d] = baseline_district

In [None]:
# List Sentinel-1 images that intersect Uganda in the study period and have VV polarization.
s1_all = s1_vv_for_region(S1.filterDate(START, END), uganda_ee)
s1_info = s1_all.reduceColumns(ee.Reducer.toList(2), ["system:index","system:time_start"]).getInfo()

In [24]:
ids = [info[0] for info in s1_info["list"]]
times = [info[1] for info in s1_info["list"]]
print("Found", len(ids), "sentinel-1 images over Uganda in the period.")

Found 4591 sentinel-1 images over Uganda in the period.


In [26]:
acq_time = datetime.datetime.fromtimestamp(int(times[0])/1000, datetime.UTC)
acq_dt_str = acq_time.strftime("%Y-%m-%d")

In [34]:
# Iterate over images and compute flooded area per district.
rows = []
for img_id, t in zip(ids, times):
    acq_time = datetime.datetime.fromtimestamp(int(t)/1000, datetime.UTC)
    acq_dt_str = acq_time.strftime("%Y-%m-%d")
    img = ee.Image("COPERNICUS/S1_GRD/" + img_id).select("VV")
    month = acq_time.month
    print("Processing 
    for i, row in sf_uga.iterrows():
        geom = (
            fc.
            filter(
                ee.Filter.eq("district", row["district"])
            )
            .first()
            .geometry()
        )
        baseline = baseline_by_district_month[row["district"]][month]
        anomaly = img.subtract(baseline)
        perm_water = GSW.select("occurrence").gte(PERM_WATER_OCCURRENCE_PCT)
        flood_mask = anomaly.lte(ANOMALY_THRESHOLD).And(perm_water.Not())
        flood_area_img = ee.Image.pixelArea().updateMask(flood_mask)
        try:
            stats = flood_area_img.reduceRegion(ee.Reducer.sum(), geom, scale=10, maxPixels=1e13)
            flooded = stats.getInfo().get("area", 0) if stats.getInfo() else 0
            # Also compute coverage fraction: whether the image covers >X% of district
            # We"ll estimate coverage by counting number of valid S1 pixels (non-masked) in the VV image
            valid_pixels = img.mask().reduceRegion(ee.Reducer.sum(), geom, scale=10, maxPixels=1e13).getInfo()
            valid_count = None
            if valid_pixels:
                # valid_pixels will have "VV" count times pixel area? Use existence as proxy; this is approximate.
                valid_count = 1
            coverage_flag = True if valid_count else False
        except Exception as e:
            flooded = None
            coverage_flag = False
        frac = float(flooded) / float(row["area"]) if (flooded is not None and row["area"]>0) else None
        rows.append({
            "acq_datetime": acq_dt_str,
            "district": row["district"],
            "flooded_km2": flooded,
            "area": row["area"],
            "flood_fraction": frac,
            "coverage": coverage_flag
        })
    (
        pd
            .DataFrame(rows)
            .to_csv(
                proc_data_dir / f"flood_extent_{"".join(acq_dt_str.split("-"))}"
            )
    )


KeyboardInterrupt



In [None]:
# Convert to DataFrame and save
df = pd.DataFrame(rows)
df.to_csv(OUTPUT_CSV, index=False)
print("Saved", OUTPUT_CSV)
df.head()

**Notes & next steps**

- The notebook lists all Sentinel-1 acquisitions over Uganda and computes a per-image flood fraction for each district.
- For districts not covered by a particular scene, `coverage` will be False and `flood_fraction` is None.
- You can aggregate the resulting table by date (e.g., group by day) or apply gap-filling/rolling averages.
- If the number of images is very large, consider processing by time chunks and exporting intermediate CSVs to Drive.