In [1]:
import ee
import pandas as pd

# Initialize Earth Engine
ee.Initialize()

COMPLETED_L2s = []

# Define date range 
startDate = ee.Date('2000-02-24')
endDate = ee.Date('2023-08-31')

era5 = (ee.ImageCollection("ECMWF/ERA5_LAND/DAILY_AGGR")
        .filterDate(startDate,endDate))

grades_id_file = '/work/pi_kandread_umass_edu/Cloud_Freq/data/grades_rivids_stratified_discharge_level2_sample.csv'
grades_ids = pd.read_csv(grades_id_file,header=None).rename(columns={0:'rivid'})
grades_ids['l2'] = grades_ids['rivid']//1E6

gdrive_folder = 'ERA5_temp_Level2'

def map_site_data(site):
    def map_image(img):
        feat_out = img.select('temperature_2m').sample(site.geometry(), 1)
        return ee.Feature(feat_out.first())

    def set_grdc_no(feat):
        site_no = ee.Algorithms.If(
            site.get('COMID'),
            site.get('COMID'),
            0
        )
        return feat.set('COMID', site_no)
    
    site_data = era5.map(map_image, True)
    site_data = ee.FeatureCollection(site_data)
    data_out = site_data.map(set_grdc_no)
    return data_out

In [2]:
for l2,g in grades_ids.groupby('l2'):
    if l2 in COMPLETED_L2s:
        continue
    
    l1_basin = l2//10
    # Load MERIT Hydro Basins
    merit = ee.FeatureCollection(f"projects/ee-tedlanghorst/assets/riv_pfaf_{l1_basin:1.0f}_MERIT_Hydro_v07_Basins_v01_bugfix1")
    
    # Get the site list and split it if over 500 elements
    all_site_list = g.rivid.to_list()
    if len(all_site_list) >= 500:
        mid_idx = len(all_site_list)//2
        site_chunks = [all_site_list[:mid_idx], all_site_list[mid_idx:]]
    else:
        site_chunks = [all_site_list]

    for chunk_count, site_list in zip(range(len(site_chunks)),site_chunks):
        # Filter the merit sites to the current chunk
        sites = merit.filter(ee.Filter.inList('COMID', ee.List(site_list)))
        
        # #TESTING
        # era5 = era5.limit(10)
        # sites = sites.limit(2)
        
        # Simplify reach geometry to a point
        sites = sites.map(lambda feat: feat.setGeometry(feat.geometry().centroid()))
        sites = sites.filter(ee.Filter.notNull(['COMID']))
        
        # Extract the temperature data
        allData = sites.map(map_site_data).flatten()
        
        # Export to Google Drive as CSV
        task = ee.batch.Export.table.toDrive(collection=allData,
                                             description=f'ERA5_Basin{l2:2.0f}_{chunk_count+1}',
                                             folder=gdrive_folder,
                                             fileFormat='CSV')
        task.start()

In [None]:
# CANCEL all ERA5 tasks
from tqdm import tqdm
tasks = ee.batch.Task.list()
for task in tqdm(tasks):
    if task.config['description'].startswith('ERA5'):
        task.cancel()
print("Done!")