In [1]:
import ee
import pandas as pd

# Initialize Earth Engine
ee.Initialize()

# Load MODIS Terra and Aqua
Terra = ee.ImageCollection("MODIS/061/MOD09GA")
Aqua = ee.ImageCollection("MODIS/061/MYD09GA")

# Define date range and sequence of days
startDate = ee.Date('2000-02-24')
endDate = ee.Date('2023-08-31')
dayList = ee.List.sequence(startDate.millis(), endDate.millis(), 24 * 60 * 60 * 1000)

grades_id_file = '/work/pi_kandread_umass_edu/Cloud_Freq/data/grades_rivids_stratified_discharge_level2_sample.csv'
grades_ids = pd.read_csv(grades_id_file,header=None).rename(columns={0:'rivid'})
grades_ids['l2'] = grades_ids['rivid']//1E6

gdrive_folder = 'MERIT_CloudMask_Level2'

COMPLETED_L2s = ([11, 12, 13, 14, 15, 16, 17, 18, 
                  21, 22, 23, 24, 25, 26, 27, 28, 29, 
                  31, 32, 33, 34, 36, 
                  41])

In [2]:
# Function to compute cloud mask
def getCloudMask(image):
    cloudStateBits = image.select('state_1km').bitwiseAnd(3)
    cloudMask = cloudStateBits.eq(1).rename('cloudMask')
    return image.addBands(cloudMask)

# Function to compute daily Image Collections
def getDailyICs(dateMillis):
    date = ee.Date(dateMillis)
    modisImages = modis.filterDate(date, date.advance(1, 'day'))
    return modisImages.set('count',modisImages.size(),
                           'dateMillis',dateMillis,
                           'date', date)

# Function to compute daily mosaics from daily ICs
def getDailyMosaics(ic):
    ic = ee.ImageCollection(ic)
    date = ee.Date(ic.get('date'))
    mosaic = (ic.reduce(ee.Reducer.mean())
                    .select('cloudMask_mean')
                    .rename('cloudMask')
                    .set('system:index', ic.first().get('system:index')))
    return ee.Image(mosaic)

# Function to extract data for each site
def extractData(site):
    def extractFeature(img):
        pixelAvg = img.select(["cloudMask"]) \
                       .reduceRegion(reducer=ee.Reducer.mean(),
                                     geometry=site.geometry(),
                                     scale=1000)
        return ee.Feature(None, pixelAvg).set('COMID', site.get('COMID'))
    siteData = dailyMosaics.map(extractFeature)
    siteData = ee.FeatureCollection(siteData)
    return siteData
                             

In [3]:
# Merge Terra and Aqua, map cloud mask function
modis = Terra.merge(Aqua).map(getCloudMask)

# Map daily mosaics function over dayList
dailyFCs = dayList.map(getDailyICs).filter(ee.Filter.gt('count',0))
dailyMosaics = ee.ImageCollection(dailyFCs.map(getDailyMosaics))


for i,g in grades_ids.groupby('l2'):
    if i in COMPLETED_L2s:
        continue
    
    l1_basin = i//10
    
    # Load MERIT Hydro Basins
    merit = ee.FeatureCollection(f"projects/ee-tedlanghorst/assets/riv_pfaf_{l1_basin:1.0f}_MERIT_Hydro_v07_Basins_v01_bugfix1")

    # Filter and buffer sites
    all_site_list = g.rivid.to_list()
    if len(all_site_list) >= 500:
        mid_idx = len(all_site_list)//2
        site_chunks = [all_site_list[:mid_idx], all_site_list[mid_idx:]]
    else:
        site_chunks = [all_site_list]

    for chunk_count, site_list in zip(range(len(site_chunks)),site_chunks):
        sites = merit.filter(ee.Filter.inList('COMID', ee.List(site_list)))
        sites = sites.map(lambda feat: feat.setGeometry(feat.geometry().centroid().buffer(10000)))

        # Map extractData function over all sites
        allData = sites.map(extractData).flatten().filter(ee.Filter.notNull(['cloudMask']))

        # Export to Google Drive as CSV
        task = ee.batch.Export.table.toDrive(collection=allData,
                                             description=f'MERIT_MODIS_Basin{i:2.0f}_{chunk_count+1}_stratified_discharge',
                                             folder=gdrive_folder,
                                             fileFormat='CSV')
        task.start()

In [3]:
# TEST

# Merge Terra and Aqua, map cloud mask function
modis = Terra.merge(Aqua).map(getCloudMask)

# Map daily mosaics function over dayList
dailyFCs = dayList.map(getDailyICs).filter(ee.Filter.gt('count',0))
dailyMosaics = ee.ImageCollection(dailyFCs.map(getDailyMosaics)).limit(10)

# Get the list of rivids from grades_ids
grades_id_file = '/work/pi_kandread_umass_edu/Cloud_Freq/data/grades_rivids_stratified_discharge_level2_sample.csv'
grades_ids = pd.read_csv(grades_id_file,header=None).rename(columns={0:'rivid'})
grades_ids['l2'] = grades_ids['rivid']//1E6

for i,g in grades_ids.groupby('l2'):
    l1_basin = i//10
    break
    
# Load MERIT Hydro Basins
merit = ee.FeatureCollection(f"projects/ee-tedlanghorst/assets/riv_pfaf_{l1_basin:1.0f}_MERIT_Hydro_v07_Basins_v01_bugfix1")

# Filter and buffer sites
site_list = ee.List(g.rivid.to_list())
sites = merit.filter(ee.Filter.inList('COMID', site_list)).limit(1)
sites = sites.map(lambda feat: feat.setGeometry(feat.geometry().centroid().buffer(10000)))

# Map extractData function over all sites
allData = sites.map(extractData).flatten().filter(ee.Filter.notNull(['cloudMask']))

allData.getInfo()

{'type': 'FeatureCollection',
 'columns': {},
 'features': [{'type': 'Feature',
   'geometry': None,
   'id': '0014000000000000111a_1_2000_02_26',
   'properties': {'COMID': 11004678, 'cloudMask': 0}},
  {'type': 'Feature',
   'geometry': None,
   'id': '0014000000000000111a_1_2000_02_27',
   'properties': {'COMID': 11004678, 'cloudMask': 0}},
  {'type': 'Feature',
   'geometry': None,
   'id': '0014000000000000111a_1_2000_02_28',
   'properties': {'COMID': 11004678, 'cloudMask': 0.0031300633377522474}},
  {'type': 'Feature',
   'geometry': None,
   'id': '0014000000000000111a_1_2000_03_01',
   'properties': {'COMID': 11004678, 'cloudMask': 0.040690823390779216}},
  {'type': 'Feature',
   'geometry': None,
   'id': '0014000000000000111a_1_2000_03_02',
   'properties': {'COMID': 11004678, 'cloudMask': 0.6793833161486724}},
  {'type': 'Feature',
   'geometry': None,
   'id': '0014000000000000111a_1_2000_03_03',
   'properties': {'COMID': 11004678, 'cloudMask': 0.3977880885746552}},
  {'t