## Make TFRecords on GCS

In this notebook, we combine forest mortality observations from aerial detection surveys with MODIS LST and ERA5 climate re-analysis data.

In [1]:
import ee
import geemap
import os

ee.Initialize()

if "notebooks" in os.getcwd():
    os.chdir("..")
    print("Changed working dir to", os.getcwd())

Changed working dir to G:\Other computers\My Laptop\UW\Classes\ESS521\project


In [2]:
# Pull together all the assets
modis_daily = ee.ImageCollection("MODIS/061/MYD11A1")
modis_8day  = ee.ImageCollection("MODIS/061/MYD11A2")
modis_lc    = ee.ImageCollection("MODIS/061/MCD12Q1")
era5        = ee.ImageCollection("ECMWF/ERA5/DAILY")
srtm        = ee.Image("CGIAR/SRTM90_V4")
states      = ee.FeatureCollection("TIGER/2018/States")
ca          = states.filter(ee.Filter.eq("NAME", "California")).first();

# Survey images
ads_damage  = ee.ImageCollection("projects/forest-lst/assets/damage_img")
ads_survey  = ee.ImageCollection("projects/forest-lst/assets/survey_img")

Image operations like to drop the `system:time_start` property sometimes. So, we have to filter the source image data to the export year before doing any joins, array transformations, etc. A good way to organize this is to have functions that take the export year and spit out the processed image.

In [3]:
# Simple time series aggregators
def make_is_cloudy(filter):
    return modis_daily\
            .filter(filter)\
            .map(lambda img: img.select("QC_Day").bitwiseAnd(2).gt(0).rename("is_cloudy"))

def make_prcp(filter):
    return era5.select(["total_precipitation"], ["prcp"]).filter(filter)

# Join MODIS and ERA5 to make delta T
def calculate_dT(feat):
    meanMaxT = ee.ImageCollection.fromImages(feat.get("secondary"))\
        .reduce(ee.Reducer.mean())\
        .select("maximum_2m_air_temperature_mean")\
        .resample("bilinear")

    lst = ee.Image(feat).select("LST_Day_1km").multiply(0.02)

    return lst.subtract(meanMaxT)\
        .rename("dT").unmask(0).copyProperties(lst)

def make_8day_dT(filter):
    era5_filter = era5.filter(filter)
    mod_filter  = modis_8day.filter(filter)

    date_range_match = ee.Filter.And(
        ee.Filter.lessThanOrEquals(leftField="system:time_start", rightField="system:time_start"),
        ee.Filter.greaterThanOrEquals(leftField="system:time_start", rightField="system:time_start")
    )

    join = ee.Join.saveAll(matchesKey="secondary", ordering="system:time_start", ascending=True)
    
    dT = join.apply(mod_filter, era5_filter, date_range_match).map(calculate_dT)
    dT = ee.ImageCollection(dT)

    return dT

In [4]:
# Verify that it worked
filter_18 = ee.Filter.calendarRange(2018, 2018, "year")
my_dT = make_8day_dT(filter_18)

Map = geemap.Map()
Map.add_basemap("HYBRID")
Map.addLayer(my_dT.first(), dict(min=-5, max=5, palette=["blue", "white", "red"]))
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [5]:
# Make severity images. This is the same join as MODIS/ERA except 1:1 so we
# do an inner join instead of a saveAll.
def calculate_severity(feat):
    survey = ee.Image(feat.get("survey"))
    damage = ee.Image(feat.get("damage"))

    return survey.blend(damage)

def make_severity(filter):
    ads_damage_filter = ads_damage.filter(filter)
    ads_survey_filter = ads_survey.filter(filter)

    date_range_match = ee.Filter.And(
        ee.Filter.lessThanOrEquals(leftField="system:time_start", rightField="system:time_start"),
        ee.Filter.greaterThanOrEquals(leftField="system:time_start", rightField="system:time_start")
    )

    inner_join = ee.Join.inner("damage", "survey")

    severity = inner_join.apply(ads_damage_filter, ads_survey_filter, date_range_match).map(calculate_severity)
    severity = ee.ImageCollection(severity)

    assert(severity.size().getInfo() == 1)
    
    return severity.first()

In [6]:
# Verify that it worked (again lol)

my_severity = make_severity(filter_18)

Map = geemap.Map()
Map.add_basemap("HYBRID")
Map.addLayer(my_severity, dict(min=0, max=5, palette=["white", "red"]))
Map.centerObject(ca, zoom=6)
Map

Map(center=[37.15243410467141, -119.5265515515086], controls=(WidgetControl(options=['position', 'transparent_…

In [7]:
# Some of the severity maps are all -1, we shouldn't worry about exporting those for now
import datetime

timestamps = ads_damage.aggregate_array("system:time_start").getInfo()

dates = map(
    lambda x: datetime.datetime.utcfromtimestamp(x/1000), 
    timestamps
)

print("All dates:")
for d in dates: print(str(d))

All dates:
2013-01-01 00:00:00
2014-01-01 00:00:00
2015-01-01 00:00:00
2016-01-01 00:00:00
2017-01-01 00:00:00
2018-01-01 00:00:00
2019-01-01 00:00:00
2020-01-01 00:00:00
2021-01-01 00:00:00
2022-01-01 00:00:00
2023-01-01 00:00:00


In [8]:
# Function to make a forest mask from the annual MODIS LC
def make_forest_mask(filter):
    return modis_lc.filter(filter).first().select("LC_Type1").lt(6)

# Check that it is working
my_mask = make_forest_mask(filter_18)

my_severity_masked = my_severity.updateMask(my_mask)

Map = geemap.Map()
Map.add_basemap("HYBRID")
Map.addLayer(my_severity, dict(min=0, max=5, palette=["white", "red"]), "unmasked")
Map.addLayer(my_severity_masked, dict(min=0, max=5, palette=["white", "red"]), "masked")
Map.centerObject(ca, zoom=6)
Map

Map(center=[37.15243410467141, -119.5265515515086], controls=(WidgetControl(options=['position', 'transparent_…

In [34]:
def make_annual_export_image(year, return_task=False):
    print(year)
    # Make time series predictors
    year_filter = ee.Filter.calendarRange(year, year, "year")
    
    this_is_cloudy = make_is_cloudy(year_filter)
    this_prcp      = make_prcp(year_filter)
    this_dT        = make_8day_dT(year_filter)

    cloud_tensor = this_is_cloudy.size().getInfo()
    prcp_tensor  = this_prcp.size().getInfo()
    dT_tensor    = this_dT.size().getInfo()

    # >= to account for leap years
    assert(cloud_tensor >= 365)
    assert(prcp_tensor >= 365)
    assert(dT_tensor == 46)

    # Cast as arrays
    this_is_cloudy_arr = this_is_cloudy.toArrayPerBand()
    this_prcp_arr = this_prcp.toArrayPerBand()
    this_dT_arr = this_dT.toArrayPerBand()

    # Make severity labels for *next year*
    severity_filter = ee.Filter.calendarRange(year+1, year+1, "year")
    this_severity = make_severity(severity_filter)
    
    # Make forest map for this year
    this_mask = make_forest_mask(year_filter)

    # Concatenate everything together
    export_img = ee.Image.cat([
        # time series predictors
        this_dT_arr,
        this_prcp_arr,
        this_is_cloudy_arr,
        # scalar predictors
        ee.Image.pixelLonLat(),
        srtm,
        # severity label
        this_severity
    ])

    # Mask out non-forest
    export_img = export_img.updateMask(this_mask)

    if return_task:
        # Export!
        return ee.batch.Export.image.toCloudStorage(
            description="annual-export-"+str(year),
            image=export_img,
            bucket="forest-lst-test-export",
            fileNamePrefix="ca_all_features_label/"+str(year)+"/",
            region=ca.geometry(),
            #shardSize=64,
            scale=1000,
            crs=this_dT.first().projection(),
            fileFormat="TFRecord",
            formatOptions=dict(
                patchDimensions=[1, 1],
                tensorDepths=dict(
                    dT=dT_tensor,
                    is_cloudy=cloud_tensor,
                    prcp=prcp_tensor
                )
            )
        )
    else:
        return export_img

In [28]:
testimg = make_annual_export_image(2018)

In [29]:
from IPython.display import display, JSON

display(JSON(testimg.getInfo()))

<IPython.core.display.JSON object>

In [39]:
# Set up exports from 2016 onwards
tasks = [
    make_annual_export_image(year, return_task=True) for year in range(2016, 2020)
]

2016
2017
2018
2019


In [None]:
for t in tasks: t.start()