## Make TFRecords on GCS

In this notebook, we combine forest mortality observations from aerial detection surveys with predictor features.

In [1]:
import ee
import geemap
import os

ee.Initialize()

if "notebooks" in os.getcwd():
    os.chdir("..")
    print("Changed working dir to", os.getcwd())

Changed working dir to G:\Other computers\My Laptop\UW\Classes\ESS521\project


Features to calculate:
 - Annual median, 5th-percentile, 95th-percentile EVI
 - Annual median, 5th-percentile, 95th-percentile LST - Air T
 - Annual median, 5th-percentile, 95th-percentile SPEI
 - Lat/Lon
 - Elevation
 - Minimum winter air temperature (e.g. for 2019: minimum from Dec 2018 - Feb 2019)
 - Water-year precipitation (e.g. for 2019: precip from Oct 2018 - Sep 2019)

Another option to consider is SAR as a proxy for canopy moisture as in [this paper](https://www.sciencedirect.com/science/article/pii/S003442572030167X). But, Sentinel-1 is only available post-2014 so we would lose long-term data.

In [24]:
# Gather assets
modis_lst  = ee.ImageCollection("MODIS/061/MYD11A1")
modis_evi  = ee.ImageCollection("MODIS/061/MYD13A2")
daymet     = ee.ImageCollection("NASA/ORNL/DAYMET_V4")
drought    = ee.ImageCollection("GRIDMET/DROUGHT")
srtm       = ee.Image("CGIAR/SRTM90_V4").resample("bilinear")
damage     = ee.ImageCollection("projects/forest-lst/assets/damage_img")

In [27]:
def drought_percentile(year, percentiles=[5, 50, 95]):
    this_drought = drought.filter(ee.Filter.calendarRange(year, year, "year"))

    names = list(map(lambda x: "p" + str(x), percentiles))
    reducer = ee.Reducer.percentile(percentiles, names)

    # The choice of index and aggregation period here is arbitrary.
    return this_drought.select("spei30d").reduce(reducer)

In [28]:
d = drought_percentile(2018)

Map = geemap.Map()
Map.addLayer(d)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [3]:
def water_year_precipitation(year):
    # Water year for year X is from Oct (X-1) - Sep (X). For example,
    # the 2019 water year is from Oct 2018 - Sep 2019.
    d = ee.Date.fromYMD(year, 1, 1)
    d_start = d.advance(-3, "month")
    d_end   = d.advance( 9, "month")

    # Filter daymet to water year, sum prcp
    return daymet.filterDate(d_start, d_end).reduce(ee.Reducer.sum()).select("prcp_sum").rename("prcp")

In [4]:
p = water_year_precipitation(2020)

Map = geemap.Map()
Map.addLayer(p)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [5]:
def minimum_winter_air_temperature(year):
    # Winter for year X is defined as Dec (x-1) - Feb (x).
    d = ee.Date.fromYMD(year, 1, 1)
    d_start = d.advance(-1, "month")
    d_end   = d.advance( 2, "month")

    # Filter daymet to water year, sum prcp
    return daymet.filterDate(d_start, d_end).reduce(ee.Reducer.min()).select("tmin_min").rename("winter_tmin")

In [6]:
t = minimum_winter_air_temperature(2020)

Map = geemap.Map()
Map.addLayer(t)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [20]:
# Join MODIS and ERA5 to make delta T
def calculate_dT(feat):
    meanMaxT = ee.ImageCollection.fromImages(feat.get("secondary"))\
        .reduce(ee.Reducer.mean())\
        .select("tmax_mean")\
        .resample("bilinear")

    # LST gets aggregated up to an annual summary so we can afford
    # to be picky about QA
    lst = ee.Image(feat)
    lst = lst.updateMask(lst.select("QC_Day").bitwiseAnd(1).eq(0))\
        .select("LST_Day_1km").multiply(0.02).subtract(273.15) # K -> C

    return lst.subtract(meanMaxT)\
        .rename("dT").copyProperties(lst)

def lst_air_difference(year):
    filter = ee.Filter.calendarRange(year, year, "year")
    
    daymet_filter = daymet.filter(filter)
    modis_lst_filter  = modis_lst.filter(filter)

    date_range_match = ee.Filter.And(
        ee.Filter.lessThanOrEquals(leftField="system:time_start", rightField="system:time_start"),
        ee.Filter.greaterThanOrEquals(leftField="system:time_start", rightField="system:time_start")
    )

    join = ee.Join.saveAll(matchesKey="secondary", ordering="system:time_start", ascending=True)
    
    dT = join.apply(modis_lst_filter, daymet_filter, date_range_match).map(calculate_dT)
    dT = ee.ImageCollection(dT)

    return dT

def lst_air_difference_percentile(year, percentiles=[5, 50, 95]):
    dT = lst_air_difference(year)
    
    names = list(map(lambda x: "p" + str(x), percentiles))
    reducer = ee.Reducer.percentile(percentiles, names)

    return dT.reduce(reducer)

In [21]:
# Verify that it worked
my_dT = lst_air_difference(2018)
my_dT_percentiles = lst_air_difference_percentile(2018)

Map = geemap.Map()
Map.add_basemap("HYBRID")
Map.addLayer(my_dT.first(), dict(min=-5, max=5, palette=["blue", "white", "red"]))
Map.addLayer(my_dT_percentiles.select("dT_p50"), dict(min=-5, max=5, palette=["blue", "white", "red"]))
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [16]:
# Annual percentiles - NDVI, dT, air T
def annual_evi_percentile(year, percentiles=[5, 50, 95]):
    this_evi = modis_evi.filter(ee.Filter.calendarRange(year, year, "year"))

    # Since we are doing an annual summary we can afford to be picky about
    # image quality.
    this_evi = this_evi.map(lambda x: x.updateMask(x.select("DetailedQA").bitwiseAnd(1).eq(0)))\
        .select("EVI")

    names = list(map(lambda x: "p" + str(x), percentiles))
    reducer = ee.Reducer.percentile(percentiles, names)

    return this_evi.reduce(reducer).multiply(0.0001)

In [17]:
evi_18 = annual_evi_percentile(2018)

Map = geemap.Map()
Map.addLayer(evi_18.select("EVI_p50"))
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [76]:
import datetime
def make_annual_image(year, percentiles=[5, 50, 95]):
    # Record-keeping variables
    year_img = ee.Image.constant(year).rename("year").toInt16()
    lon_lat  = ee.Image.pixelLonLat()

    # Water-year precip
    prcp = water_year_precipitation(year)

    # Minimum winter T
    min_winter_T = minimum_winter_air_temperature(year)

    # Percentile-based images
    dT = lst_air_difference_percentile(year, percentiles)
    evi = annual_evi_percentile(year, percentiles)
    spei = drought_percentile(year, percentiles)

    # Response - mortality severity
    mort = damage.filter(ee.Filter.calendarRange(year, year, "year")).first()

    # Stack all the bands together. Since we are converting to an array image
    # none of the pixels can be masked. We mask all bands based on the mortality
    # raster and then remove the mask, gap-filling with zeros.
    all_bands = ee.Image([
        mort,
        year_img,
        lon_lat,
        srtm,
        prcp,
        min_winter_T,
        dT,
        evi,
        spei,
    ]).updateMask(mort.mask()).unmask(0)

    # Set timekeeping properties
    epoch_start = datetime.datetime(year, 1, 1, 0, 0, 0, 
                                    tzinfo=datetime.timezone.utc)
    epoch_end   = datetime.datetime(year+1, 1, 1, 0, 0, 0, 
                                    tzinfo=datetime.timezone.utc) - datetime.timedelta(milliseconds=1)

    all_bands = all_bands.set({
        "system:time_start": epoch_start.timestamp() * 1000,
        "system:time_end": epoch_end.timestamp() * 1000
    })

    return all_bands

In [77]:
test_img = make_annual_image(2018)

In [78]:
from IPython.display import display, JSON

display(JSON(test_img.getInfo()))

<IPython.core.display.JSON object>

In [79]:
Map = geemap.Map()
Map.addLayer(test_img)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [80]:
# Determine what years have all the data we need
all_assets = [modis_lst, modis_evi, daymet, drought, damage]

latest_start = max(map(lambda x: x.aggregate_min("system:time_start").getInfo(), all_assets))
earliest_end = min(map(lambda x: x.aggregate_max("system:time_start").getInfo(), all_assets))

start_year = datetime.datetime.utcfromtimestamp(latest_start // 1000).year
end_year   = datetime.datetime.utcfromtimestamp(earliest_end // 1000).year

available_years = list(range(start_year, end_year+1))

print("Available years:", available_years)

Available years: [2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]


In [81]:
# Make an image for each year
annual_images = list(map(lambda year: make_annual_image(year, percentiles=[5, 50, 95]), available_years))

In [82]:
bands = [b["id"] for b in annual_images[0].getInfo()["bands"]]
print("Bands:", bands)

Bands: ['pct_mortality', 'year', 'longitude', 'latitude', 'elevation', 'prcp', 'winter_tmin', 'dT_p5', 'dT_p50', 'dT_p95', 'EVI_p5', 'EVI_p50', 'EVI_p95', 'spei30d_p5', 'spei30d_p50', 'spei30d_p95']


In [83]:
# Turn the image list into an image collection, stack it as an array
annual_arr = ee.ImageCollection.fromImages(annual_images).toArrayPerBand()
display(JSON(annual_arr.getInfo()))

<IPython.core.display.JSON object>

In [84]:
# Export!
tensorDepths = {b: len(available_years) for b in bands}

ca = ee.FeatureCollection("TIGER/2018/States")\
    .filter(ee.Filter.eq("NAME", "California"))\
    .first()

task = ee.batch.Export.image.toCloudStorage(
    description="ca-dense-tensors",
    image=annual_arr,
    bucket="forest-lst-test-export",
    fileNamePrefix="ca_dense_tensors",
    region=ca.geometry(),
    #shardSize=64,
    scale=1000,
    crs=modis_lst.first().projection(),
    fileFormat="TFRecord",
    formatOptions=dict(
        patchDimensions=[1, 1],
        tensorDepths=tensorDepths
    )
)

task.start()