# Pull Sentinel-2 time series (per break) with Google Earth Engine

This notebook exports per-break time series for Sentinel-2 indices.

Inputs:
- `data/labels/kitchener_water_main_breaks.geojson`
- `data/geometry/corridors.geojson` (optional, for ROI filtering)

Outputs:
- CSV exports to Google Drive (download into `data/imagery/sentinel2_time_series/`)

Time strategy (per break):
- 6 months before break
- 2 months after break
- baseline same-month snapshots from 2–4 years prior


In [None]:
import json
import ee

# First run only
# ee.Authenticate()

ee.Initialize()


In [None]:
# Load break points
with open('data/labels/kitchener_water_main_breaks.geojson', 'r') as f:
    breaks_geojson = json.load(f)

breaks_fc = ee.FeatureCollection(breaks_geojson)

# Optional: limit for quick testing
# breaks_fc = breaks_fc.limit(100)


In [None]:
# Sentinel-2 Surface Reflectance (harmonized)
s2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')

def mask_s2_sr(image):
    qa = image.select('QA60')
    cloud = qa.bitwiseAnd(1 << 10).eq(0).And(qa.bitwiseAnd(1 << 11).eq(0))
    return image.updateMask(cloud).divide(10000)

def add_indices(image):
    ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
    ndwi = image.normalizedDifference(['B3', 'B8']).rename('NDWI')
    ndre = image.normalizedDifference(['B8A', 'B5']).rename('NDRE')
    rendvi = image.normalizedDifference(['B7', 'B5']).rename('RENDVI')
    return image.addBands([ndvi, ndwi, ndre, rendvi])

def composite_for_month(month_start, geom):
    month_end = ee.Date(month_start).advance(1, 'month')
    col = (s2
        .filterDate(month_start, month_end)
        .filterBounds(geom)
        .map(mask_s2_sr)
        .map(add_indices))
    return col.median()


In [None]:
# Build per-break time series
buffer_m = 30
baseline_years_start = 2
baseline_years_end = 4

def per_break_timeseries(feature):
    break_date = ee.Date.parse('YYYY-MM-dd', feature.get('break_date'))
    geom = feature.geometry().buffer(buffer_m)

    start = break_date.advance(-6, 'month')
    end = break_date.advance(2, 'month')
    n_months = end.difference(start, 'month')

    months = ee.List.sequence(0, n_months.subtract(1))

    def window_img(n):
        n = ee.Number(n)
        month_start = start.advance(n, 'month')
        img = composite_for_month(month_start, geom)
        return img.set({
            'month': month_start.format('YYYY-MM'),
            'kind': 'window',
            'baseline_years_ago': None
        })

    years = ee.List.sequence(baseline_years_start, baseline_years_end)

    def baseline_img(y):
        y = ee.Number(y)
        base_date = break_date.advance(y.multiply(-1), 'year')
        month_start = ee.Date.fromYMD(base_date.get('year'), break_date.get('month'), 1)
        img = composite_for_month(month_start, geom)
        return img.set({
            'month': month_start.format('YYYY-MM'),
            'kind': 'baseline',
            'baseline_years_ago': y
        })

    imgs = ee.ImageCollection.fromImages(months.map(window_img).cat(years.map(baseline_img)))

    def img_to_feature(img):
        stats = img.select(['NDVI', 'NDWI', 'NDRE', 'RENDVI']).reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=geom,
            scale=20,
            maxPixels=1e9
        )
        return ee.Feature(None, stats).set({
            'break_id': feature.get('id'),
            'break_date': feature.get('break_date'),
            'month': img.get('month'),
            'kind': img.get('kind'),
            'baseline_years_ago': img.get('baseline_years_ago')
        })

    return ee.FeatureCollection(imgs.map(img_to_feature))

series_fc = ee.FeatureCollection(breaks_fc.map(per_break_timeseries)).flatten()


In [None]:
# Export to Google Drive as CSV
folder = 'water_leakage_s2_time_series'

task = ee.batch.Export.table.toDrive(
    collection=series_fc,
    description='s2_time_series',
    folder=folder,
    fileNamePrefix='s2_time_series',
    fileFormat='CSV'
)
task.start()
print('Export started')
