In [1]:
import ee
ee.Authenticate(auth_mode='notebook')
ee.Initialize(project='global-booster-421311')


In [2]:
catchments = ee.FeatureCollection('projects/global-booster-421311/assets/LakeCatchments_NA')

variables_mean = [
    'temperature_2m',
    'lake_mix_layer_temperature'
]
variables_sum = [
    'total_precipitation_sum',
    'surface_runoff_sum',
    'runoff_sum'
]
export_fields = [
    'Lake_ID', 'year', 'week', 'week_start', 'week_end',
    'temperature_2m',
    'lake_mix_layer_temperature',
    'total_precipitation_sum',
    'surface_runoff_sum',
    'runoff_sum'
]

def features_for_week(era, year, week):
    week = ee.Number(week)
    start_day = week.subtract(1).multiply(7)
    end_day = week.multiply(7).subtract(1)
    start_date = ee.Date(f'{year}-01-01').advance(start_day, 'day')
    end_date = ee.Date(f'{year}-01-01').advance(end_day, 'day')
    week_imgs = era.filterDate(start_date, end_date.advance(1, 'day'))
    mean_img = week_imgs.mean().select(variables_mean)
    sum_img = week_imgs.sum().select(variables_sum)
    stack = mean_img.addBands(sum_img)
    stats = stack.reduceRegions(
        collection=catchments,
        reducer=ee.Reducer.mean().forEachBand(mean_img)
                .combine(ee.Reducer.sum().forEachBand(sum_img), '', False),
        scale=10000
    ).map(lambda f: f.set({
        'year': year,
        'week': week,
        'week_start': start_date.format('YYYY/MM/dd'),
        'week_end': end_date.format('YYYY/MM/dd')
    }))
    return stats

def drop_geometry(feature):
    return feature.setGeometry(None)

years = list(range(2002, 2023))  # 2002 to 2022

for year in years:
    print(f"Processing year {year}...")
    era = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR').filterDate(f'{year}-01-01', f'{year+1}-01-01')
    weeks = list(range(1, 54))
    weekly_collections = ee.List(weeks).map(lambda w: features_for_week(era, year, ee.Number(w)))
    all_features = ee.FeatureCollection(weekly_collections).flatten()
    all_features_no_geom = all_features.map(drop_geometry)
    task = ee.batch.Export.table.toDrive(
        collection=all_features_no_geom.select(export_fields),
        description=f'ERA5Land_Weekly_Catchments_{year}_Climate',
        fileFormat='CSV'
    )
    task.start()
    print(f"Export started for {year}! Monitor in GEE Tasks.")

print("All yearly exports started! 🚀")


Processing year 2002...
Export started for 2002! Monitor in GEE Tasks.
Processing year 2003...
Export started for 2003! Monitor in GEE Tasks.
Processing year 2004...
Export started for 2004! Monitor in GEE Tasks.
Processing year 2005...
Export started for 2005! Monitor in GEE Tasks.
Processing year 2006...
Export started for 2006! Monitor in GEE Tasks.
Processing year 2007...
Export started for 2007! Monitor in GEE Tasks.
Processing year 2008...
Export started for 2008! Monitor in GEE Tasks.
Processing year 2009...
Export started for 2009! Monitor in GEE Tasks.
Processing year 2010...
Export started for 2010! Monitor in GEE Tasks.
Processing year 2011...
Export started for 2011! Monitor in GEE Tasks.
Processing year 2012...
Export started for 2012! Monitor in GEE Tasks.
Processing year 2013...
Export started for 2013! Monitor in GEE Tasks.
Processing year 2014...
Export started for 2014! Monitor in GEE Tasks.
Processing year 2015...
Export started for 2015! Monitor in GEE Tasks.
Proces

In [3]:
import pandas as pd
import os

folder = "Datasets\ECMWF_raw"  # adjust if needed

for fname in os.listdir(folder):
    if fname.startswith('ERA5Land_Weekly_Catchments_') and fname.endswith('.csv'):
        fpath = os.path.join(folder, fname)
        df = pd.read_csv(fpath)

        # Drop unwanted columns if present
        to_drop = ['.geo', 'system:index']
        df = df.drop(columns=[col for col in to_drop if col in df.columns])

        # Desired column order
        main_cols = ['Lake_ID', 'year', 'week', 'week_start', 'week_end']
        other_cols = [col for col in df.columns if col not in main_cols]
        df = df[main_cols + other_cols]

        # Sort by Lake_ID and date (year, week)
        df = df.sort_values(by=['Lake_ID', 'year', 'week'])

        # Save cleaned table (overwrite or add suffix if you prefer)
        df.to_csv(fpath, index=False)
        print(f"✅ Cleaned and exported: {fname}")

print("🎉 All ERA5 weekly climate CSVs cleaned and sorted!")


  folder = "Datasets\ECMWF_raw"  # adjust if needed


✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2002_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2003_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2004_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2005_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2006_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2007_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2008_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2009_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2010_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2011_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2012_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2013_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2014_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchments_2015_Climate.csv
✅ Cleaned and exported: ERA5Land_Weekly_Catchmen