# Uganda Flood Timeseries — Colab-ready notebook

This Colab notebook computes **per-Sentinel-1 acquisition × district** flood metrics and exports results as CSV files to your Google Drive. It runs the on-the-fly per-district monthly baseline and uses `Export.table.toDrive` for server-side exports.

**Before you run:** upload your Uganda Admin-2 shapefile (all companion files .shp/.shx/.dbf/.prj) to a Drive folder and set `DRIVE_SHAPE_FOLDER` accordingly.


In [None]:
!pip install -q earthengine-api geemap geopandas pyogrio rasterio pandas shapely

In [None]:

# Authenticate and initialize Earth Engine and mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

import ee, geemap
ee.Authenticate()  # follow the link, paste token
ee.Initialize()

print('EE initialized.')


In [None]:

# ---------- User parameters (edit) ----------
# Path in Drive where your shapefile (uganda_admin2.*) lives, e.g. '/content/drive/MyDrive/uganda_shp'
DRIVE_SHAPE_FOLDER = '/content/drive/MyDrive/uganda_shp'

# Filenames inside that folder
SHP_FILENAME = 'uganda_admin2.shp'  # make sure companion .dbf .shx are present

# Baseline period and study period
BASELINE_START = '2017-01-01'
BASELINE_END   = '2019-12-31'
STUDY_START = '2020-01-01'
STUDY_END   = '2025-08-31'

# Flood detection params
ANOMALY_THRESHOLD = -3.0   # dB
PERM_WATER_PCT = 50       # JRC occurrence threshold to treat as permanent water

# Export settings: Drive folder to save CSVs
EXPORT_FOLDER = 'EE_Flood_Exports'  # created inside your Drive MyDrive root
IMAGES_PER_BATCH = 25  # how many S1 images to create export tasks for in one run (tune lower if tasks fail)


In [None]:

# Load shapefile from Drive into geopandas and convert to EE FeatureCollection
import geopandas as gpd, os
from shapely.geometry import mapping
shp_path = os.path.join(DRIVE_SHAPE_FOLDER, SHP_FILENAME)
gdf = gpd.read_file(shp_path)
gdf = gdf.to_crs(epsg=4326)
# compute area in m2 and store as column for stability
gdf['area_m2'] = gdf.geometry.to_crs(epsg=3857).area
# choose a name field
name_field = None
for c in ['NAME_2','district','ADM2_NAME','name','DN']:
    if c in gdf.columns:
        name_field = c
        break
if name_field is None:
    name_field = gdf.columns[0]

# Convert to EE FeatureCollection and set area_m2 as property
import geemap, ee
fc = geemap.geopandas_to_ee(gdf, geodesic=False)
# ensure area_m2 present as property (server-side)
def set_area(feature):
    return feature.set({'area_m2': ee.Number(feature.geometry().area())})
fc = fc.map(set_area)

print('Loaded shapefile with', len(gdf), 'features. Name field:', name_field)


In [None]:

# Load collections and helper S1 filter
S1_ID = 'COPERNICUS/S1_GRD'
GSW_ID = 'JRC/GSW1_3/GlobalSurfaceWater'
s1_col = ee.ImageCollection(S1_ID).filterDate(STUDY_START, STUDY_END)     .filter(ee.Filter.eq('instrumentMode', 'IW'))     .filter(ee.Filter.listContains('transmitterReceiverPolarisation','VV'))     .filterBounds(fc.geometry())

gsw = ee.Image(GSW_ID).select('occurrence')
print('Sentinel-1 images over study period (server-side collection size):', s1_col.size().getInfo())


In [None]:

# Obtain list of image IDs and times (client-side listing). For large numbers this may still be OK.
# If too large, consider chunking by year and repeating this listing per year.
info = s1_col.reduceColumns(ee.Reducer.toList(2), ['system:index','system:time_start']).getInfo()
ids = info['list'][0]
times = info['list'][1]
print('Total S1 images found:', len(ids))
# Build list of dicts for images
images = [{'id': i, 'time_ms': t} for i,t in zip(ids,times)]
# Optionally limit for a demo run:
# images = images[:10]


In [None]:

# Function to process one S1 image and return a FeatureCollection of per-district results
def process_image_to_fc(image_id):
    img = ee.Image(S1_ID + '/' + image_id).select('VV')
    img_date = ee.Date(img.get('system:time_start'))
    month = img_date.get('month')  # 1..12
    # baseline: images in baseline range and same calendar month, clipped to feature when used
    def per_feature_fn(feature):
        geom = feature.geometry()
        # baseline collection for this district/month
        baseline_col = ee.ImageCollection(S1_ID)             .filterDate(BASELINE_START, BASELINE_END)             .filter(ee.Filter.eq('instrumentMode','IW'))             .filter(ee.Filter.listContains('transmitterReceiverPolarisation','VV'))             .filterBounds(geom)             .filter(ee.Filter.calendarRange(month, month, 'month'))             .select('VV')
        baseline = baseline_col.median().clip(geom)
        baseline_exists = baseline_col.size().gt(0)
        # clipped image
        img_clip = img.clip(geom)
        # anomaly and flood mask
        anomaly = img_clip.subtract(baseline)
        flood_mask = anomaly.lte(ANOMALY_THRESHOLD).And(gsw.lt(PERM_WATER_PCT))
        # flooded area
        flooded_area = ee.Image.pixelArea().updateMask(flood_mask).reduceRegion(
            ee.Reducer.sum(), geom, scale=10, maxPixels=1e13
        ).get('area')
        flooded_area = ee.Number(flooded_area).unmask(0)
        # district area from property (we set earlier)
        district_area = ee.Number(feature.get('area_m2'))
        flood_fraction = ee.Algorithms.If(district_area.gt(0), flooded_area.divide(district_area), None)
        # coverage: fraction of district with any unmasked pixels in the image
        valid_mask = img_clip.mask().gt(0)
        valid_area = ee.Image.pixelArea().updateMask(valid_mask).reduceRegion(
            ee.Reducer.sum(), geom, scale=10, maxPixels=1e13
        ).get('area')
        valid_area = ee.Number(valid_area).unmask(0)
        coverage_pct = ee.Number(100).multiply(valid_area.divide(district_area))
        # assemble feature properties
        out = feature.set({
            'image_id': image_id,
            'acq_time': img_date.format('YYYY-MM-dd'T'HH:mm:ss'Z''),
            'baseline_exists': baseline_exists,
            'flooded_m2': flooded_area,
            'district_area_m2': district_area,
            'flood_fraction': flood_fraction,
            'coverage_pct': coverage_pct
        })
        return out
    return fc.map(per_feature_fn)

# Export a single image's per-district results to Drive as CSV
def export_image_to_drive(image_id, file_prefix):
    results_fc = process_image_to_fc(image_id)
    task = ee.batch.Export.table.toDrive({
        'collection': results_fc,
        'description': f'export_{image_id}',
        'folder': EXPORT_FOLDER,
        'fileNamePrefix': f'{file_prefix}_{image_id}',
        'fileFormat': 'CSV'
    })
    task.start()
    return task

# Example: run export for first N images in batches to avoid overwhelming tasks
from time import sleep
n_images = len(images)
print('Scheduling exports for', n_images, 'images (in batches of', IMAGES_PER_BATCH, ')')

for i in range(0, n_images, IMAGES_PER_BATCH):
    batch = images[i:i+IMAGES_PER_BATCH]
    print(f'Batch {i//IMAGES_PER_BATCH + 1}: scheduling {len(batch)} exports...')
    for item in batch:
        imgid = item['id']
        task = export_image_to_drive(imgid, 's1_districts')
        print('Started task for image', imgid, 'task id:', task.id)
        # optional: small sleep to pace task submissions
        sleep(1)

print('All export tasks submitted. Check the Earth Engine Tasks tab or drive folder to monitor exports.')



## Notes after running

- After tasks complete, you will find multiple CSV files in your Drive `EXPORT_FOLDER`. You can download and concatenate them locally to create the master CSV.

- To concatenate in Colab:
```python
import pandas as pd
from glob import glob
files = glob('/content/drive/MyDrive/EE_Flood_Exports/*.csv')
all = pd.concat([pd.read_csv(f) for f in files], ignore_index=True)
all.to_csv('/content/drive/MyDrive/uganda_flood_master.csv', index=False)
```

- If you want a preview run, limit `images = images[:5]` above and `IMAGES_PER_BATCH=2` to test.
