## Make TFRecords on GCS

In this notebook, we combine forest mortality observations from aerial detection surveys with predictor features.

In [1]:
import ee
import geemap
import os

ee.Initialize()

if "notebooks" in os.getcwd():
    os.chdir("..")
    print("Changed working dir to", os.getcwd())

Changed working dir to /home/jovyan/ForestLST


Features to calculate from Preisler et al. (2017):
 - 36-year average precipitation
 - area affected by fire 2-4 years prior
 - area with mortality in the prior year
 - maximum area with mortality in the prior year among neighboring cells
 - precipitation in last 1-4 years
 - minimum winter temperature

Other ideas
 - drought indices
 - elevation
 - aspect
 - LST
 - EVI

Another option to consider is SAR as a proxy for canopy moisture as in [this paper](https://www.sciencedirect.com/science/article/pii/S003442572030167X). But, Sentinel-1 is only available post-2014 so we would lose long-term data.

## Image options

In [2]:
# Projection and cell size
TARGET_PROJ = ee.Projection("EPSG:3857")
CELL_SIZE = 4000 # m
# Export region
CALIF = ee.FeatureCollection("TIGER/2018/States")\
    .filter(ee.Filter.eq("NAME", "California"))\
    .first()

## Preisler predictors

In [3]:
# Gather assets
prism_normal = ee.ImageCollection('OREGONSTATE/PRISM/Norm91m')
daymet       = ee.ImageCollection("NASA/ORNL/DAYMET_V4")
mtbs         = ee.FeatureCollection('USFS/GTAC/MTBS/burned_area_boundaries/v1')
host         = ee.Image.constant(0).blend(ee.Image("projects/forest-lst/assets/nidrms_host_present"))
damage       = ee.ImageCollection("projects/forest-lst/assets/damage_img")

In [4]:
def prism_avg_precip():
    return prism_normal.select("ppt").reduce(ee.Reducer.sum())\
        .setDefaultProjection(prism_normal.first().projection())\
        .reduceResolution(ee.Reducer.sum(), maxPixels=1024, bestEffort=True)\
        .reproject(TARGET_PROJ, None, CELL_SIZE)

prism_ppt = prism_avg_precip()

Map = geemap.Map()
Map.addLayer(prism_ppt)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [5]:
def burned_area(year):
    # Get the burned area polygons we care about
    year_start = ee.Date.fromYMD(year, 1, 1).millis()
    year_end   = ee.Date.fromYMD(year, 12, 31).millis()

    year_filter = ee.Filter.rangeContains("Ig_Date", year_start, year_end)

    mtbs_filter = mtbs.filterBounds(CALIF.geometry()).filter(year_filter)

    n_fires = mtbs_filter.size().getInfo()
    #print("Found {} fires".format(n_fires))

    # Rasterize
    mtbs_raster = mtbs_filter.map(lambda x: x.set("const", 1))\
        .reduceToImage(["const"], ee.Reducer.max())\
        .rename("burn_pct")

    # Blend with the zero image, then reduce resolution to calculate pixel fraction
    mtbs_reproj = ee.Image.constant(0).blend(mtbs_raster)\
        .setDefaultProjection(TARGET_PROJ, None, 100)\
        .reduceResolution(ee.Reducer.mean(), maxPixels=4096, bestEffort=True)\
        .reproject(TARGET_PROJ, None, CELL_SIZE)

    return mtbs_reproj

burn_2018 = burned_area(2018)

Map = geemap.Map()
Map.addLayer(burn_2018)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [6]:
def water_year_precipitation(year):
    # Water year for year X is from Oct (X-1) - Sep (X). For example,
    # the 2019 water year is from Oct 2018 - Sep 2019.
    d = ee.Date.fromYMD(year, 1, 1)
    d_start = d.advance(-3, "month")
    d_end   = d.advance( 9, "month")

    # Filter daymet to water year, sum prcp
    return daymet.filterDate(d_start, d_end).reduce(ee.Reducer.sum()).select("prcp_sum").rename("prcp")\
        .setDefaultProjection(daymet.first().projection())\
        .reduceResolution(ee.Reducer.sum(), bestEffort=True)\
        .reproject(TARGET_PROJ, None, CELL_SIZE)

p = water_year_precipitation(2020)

Map = geemap.Map()
Map.addLayer(p)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [7]:
def minimum_winter_air_temperature(year):
    # Winter for year X is defined as Dec (x-1) - Feb (x).
    d = ee.Date.fromYMD(year, 1, 1)
    d_start = d.advance(-1, "month")
    d_end   = d.advance( 2, "month")

    # Filter daymet to water year, sum prcp
    return daymet.filterDate(d_start, d_end).reduce(ee.Reducer.min()).select("tmin_min").rename("winter_tmin")\
        .setDefaultProjection(daymet.first().projection())\
        .reduceResolution(ee.Reducer.mean(), bestEffort=True)\
        .reproject(TARGET_PROJ, None, CELL_SIZE)

t = minimum_winter_air_temperature(2020)

Map = geemap.Map()
Map.addLayer(t)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [8]:
def summer_mean_vapor_pressure(year):
    # Summer is Jun - Sep
    d_start = ee.Date.fromYMD(year, 6,  1)
    d_end   = ee.Date.fromYMD(year, 9, 30)

    return daymet.filterDate(d_start, d_end).reduce(ee.Reducer.mean()).select("vp_mean").rename("vp_mean")\
        .setDefaultProjection(daymet.first().projection())\
        .reduceResolution(ee.Reducer.mean(), bestEffort=True)\
        .reproject(TARGET_PROJ, None, CELL_SIZE)

vp = summer_mean_vapor_pressure(2020)

Map = geemap.Map()
Map.addLayer(vp)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [9]:
def remaining_host(year):
    prior_damage = damage.filter(ee.Filter.calendarRange(year, year, "year")).first()
    return host.subtract(prior_damage).clamp(0, 1).rename("rhost")

rhost = remaining_host(2018)

Map = geemap.Map()
Map.addLayer(host)
Map.addLayer(rhost)
Map.addLayer(rhost.lt(host))
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [10]:
def max_damage_to_neighbors(year):
    prior_damage = damage.filter(ee.Filter.calendarRange(year, year, "year")).first()
    kernel = ee.Kernel.fixed(
        width=3, height=3,
        weights=[
            [1, 1, 1],
            [1, 0, 1],
            [1, 1, 1]
        ]
    )

    return prior_damage.focalMax(kernel=kernel).rename("near")\
        .setDefaultProjection(prior_damage.projection())\
        .reproject(TARGET_PROJ, None, CELL_SIZE)

maxdam = max_damage_to_neighbors(2015)
dam    = damage.filter(ee.Filter.calendarRange(2015, 2015, "year")).first()

Map = geemap.Map()
Map.addLayer(maxdam, {}, "Focal max")
Map.addLayer(dam, {}, "Original damage")
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [11]:
Map = geemap.Map()
Map.addLayer(host)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

## Other predictors

In [12]:
# Gather assets
modis_lst  = ee.ImageCollection("MODIS/061/MYD11A1")
modis_evi  = ee.ImageCollection("MODIS/061/MYD13A2")
daymet     = ee.ImageCollection("NASA/ORNL/DAYMET_V4")
drought    = ee.ImageCollection("GRIDMET/DROUGHT")
srtm       = ee.Image("CGIAR/SRTM90_V4").resample("bilinear")
damage     = ee.ImageCollection("projects/forest-lst/assets/damage_img")

In [13]:
def drought_percentile(year, percentiles=[5, 50, 95]):
    this_drought = drought.filter(ee.Filter.calendarRange(year, year, "year"))

    names = list(map(lambda x: "p" + str(x), percentiles))
    reducer = ee.Reducer.percentile(percentiles, names)

    # The choice of index and aggregation period here is arbitrary.
    return this_drought.select("spei30d").reduce(reducer)

In [14]:
d = drought_percentile(2018)

Map = geemap.Map()
Map.addLayer(d)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [15]:
# Join MODIS and ERA5 to make delta T
def calculate_dT(feat):
    meanMaxT = ee.ImageCollection.fromImages(feat.get("secondary"))\
        .reduce(ee.Reducer.mean())\
        .select("tmax_mean")\
        .resample("bilinear")

    # LST gets aggregated up to an annual summary so we can afford
    # to be picky about QA
    lst = ee.Image(feat)
    lst = lst.updateMask(lst.select("QC_Day").bitwiseAnd(1).eq(0))\
        .select("LST_Day_1km").multiply(0.02).subtract(273.15) # K -> C

    return lst.subtract(meanMaxT)\
        .rename("dT").copyProperties(lst)

def lst_air_difference(year):
    filter = ee.Filter.calendarRange(year, year, "year")
    
    daymet_filter = daymet.filter(filter)
    modis_lst_filter  = modis_lst.filter(filter)

    date_range_match = ee.Filter.And(
        ee.Filter.lessThanOrEquals(leftField="system:time_start", rightField="system:time_start"),
        ee.Filter.greaterThanOrEquals(leftField="system:time_start", rightField="system:time_start")
    )

    join = ee.Join.saveAll(matchesKey="secondary", ordering="system:time_start", ascending=True)
    
    dT = join.apply(modis_lst_filter, daymet_filter, date_range_match).map(calculate_dT)
    dT = ee.ImageCollection(dT)

    return dT

def lst_air_difference_percentile(year, percentiles=[5, 50, 95]):
    dT = lst_air_difference(year)
    
    names = list(map(lambda x: "p" + str(x), percentiles))
    reducer = ee.Reducer.percentile(percentiles, names)

    return dT.reduce(reducer)

In [16]:
# Verify that it worked
my_dT = lst_air_difference(2018)
my_dT_percentiles = lst_air_difference_percentile(2018)

Map = geemap.Map()
Map.add_basemap("HYBRID")
Map.addLayer(my_dT.first(), dict(min=-5, max=5, palette=["blue", "white", "red"]))
Map.addLayer(my_dT_percentiles.select("dT_p50"), dict(min=-5, max=5, palette=["blue", "white", "red"]))
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [17]:
# Annual percentiles - NDVI, dT, air T
def annual_evi_percentile(year, percentiles=[5, 50, 95]):
    this_evi = modis_evi.filter(ee.Filter.calendarRange(year, year, "year"))

    # Since we are doing an annual summary we can afford to be picky about
    # image quality.
    this_evi = this_evi.map(lambda x: x.updateMask(x.select("DetailedQA").bitwiseAnd(1).eq(0)))\
        .select("EVI")

    names = list(map(lambda x: "p" + str(x), percentiles))
    reducer = ee.Reducer.percentile(percentiles, names)

    return this_evi.reduce(reducer).multiply(0.0001)

In [18]:
evi_18 = annual_evi_percentile(2018)

Map = geemap.Map()
Map.addLayer(evi_18.select("EVI_p50"))
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

## Do export

In [19]:
import datetime
def make_annual_image(year):
    # Record-keeping variables
    year_img = ee.Image.constant(year).rename("year").toInt16()
    lon_lat  = ee.Image.pixelLonLat()

    # Long-term avg precip
    prism_prcp = prism_avg_precip()
    
    # Water-year precip
    prcp = water_year_precipitation(year).rename("prcp")

    # Minimum winter T
    min_winter_T = minimum_winter_air_temperature(year).rename("win_tmin")

    # Mean summer vapor pressure
    sum_vp = summer_mean_vapor_pressure(year).rename("sum_vp")

    # Area with mortality prior year
    rhost = remaining_host(year).rename("rhost")
    
    # Maximum area with mortality in neighboring cells prior year
    near = max_damage_to_neighbors(year).rename("near")

    # Area affected by fire 2-4 years ago
    fire = burned_area(year).rename("fire")

    # Response - mortality severity
    mort = damage.filter(ee.Filter.calendarRange(year, year, "year")).first()

    all_bands = ee.Image([
        year_img,
        lon_lat,
        prism_prcp,
        prcp,
        min_winter_T,
        sum_vp,
        rhost,
        near,
        fire,
        mort
    ]).updateMask(mort.mask())

    # Set timekeeping properties
    epoch_start = datetime.datetime(year, 1, 1, 0, 0, 0, 
                                    tzinfo=datetime.timezone.utc)
    epoch_end   = datetime.datetime(year+1, 1, 1, 0, 0, 0, 
                                    tzinfo=datetime.timezone.utc) - datetime.timedelta(milliseconds=1)

    all_bands = all_bands.set({
        "system:time_start": epoch_start.timestamp() * 1000,
        "system:time_end": epoch_end.timestamp() * 1000
    })

    return all_bands

In [20]:
first_year = datetime.datetime.utcfromtimestamp(damage.aggregate_min("system:time_start").getInfo()/1000).year
last_year  = datetime.datetime.utcfromtimestamp(damage.aggregate_max("system:time_start").getInfo()/1000).year
print(first_year, last_year)

1999 2023


In [21]:
# We lose the first year because there is no prior beetle attack for it to generate the `near` image.
from googleapiclient.errors import HttpError
from ee import EEException

for year in range(first_year, last_year+1):
    try:
        img = make_annual_image(year)
        img.getInfo()
    except (HttpError, EEException) as e:
        print(year, "failed")

In [22]:
from IPython.display import display, JSON

test_img = make_annual_image(2000)

display(JSON(test_img.getInfo()))

<IPython.core.display.JSON object>

In [23]:
Map = geemap.Map()
Map.addLayer(test_img)
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [24]:
# Make an image for each year in the Preisler paper
available_years = list(range(first_year+1, last_year+1))
annual_images = list(map(lambda year: make_annual_image(year), available_years))

In [25]:
bands = [b["id"] for b in annual_images[0].getInfo()["bands"]]
print("Bands:", bands)

Bands: ['year', 'longitude', 'latitude', 'ppt_sum', 'prcp', 'win_tmin', 'sum_vp', 'rhost', 'near', 'fire', 'pct_mortality']


## Array image

## Export options

Tiled export vs. whole-image export. Tiling is necessary for array images because they have a lot more data.

In [26]:
# Exporting the entire array image at once takes a long time. But, we would like to keep the tensors
# "dense". So, split the export up across spatial tiles.
# See https://google-earth-engine.com/Advanced-Topics/Scaling-up-in-Earth-Engine/
grid_size = 64000 # m, effectively a 16x16 patch
grid = CALIF.geometry().coveringGrid(annual_images[0].select("pct_mortality").projection(), grid_size)
tile_count = grid.size().getInfo()
print("Number of tiles:", tile_count)

Map = geemap.Map()
Map.addLayer(annual_images[0])
Map.addLayer(grid)
Map

Number of tiles: 203


Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [27]:
def make_tile_export(idx, feature):
    return ee.batch.Export.image.toCloudStorage(
        description="preisler-tensors-" + str(idx),
        image=annual_images[0],
        bucket="preisler_tfdata",
        fileNamePrefix="ca_dense_tensors_v3/tile"+str(idx)+"/",
        region=ee.Feature(feature).geometry(),
        scale=CELL_SIZE,
        crs=TARGET_PROJ,
        fileFormat="TFRecord",
        formatOptions=dict(
            patchDimensions=[1, 1]
        )
    )

def make_image_export(img):
    ms = img.get("system:time_start").getInfo()
    year = datetime.datetime.utcfromtimestamp(ms/1000.0).year
    return ee.batch.Export.image.toCloudStorage(
        description="preisler-tensors-"+str(year),
        image=img,
        bucket="preisler_tfdata",
        fileNamePrefix="ca_whole_image/yr"+str(year)+"/",
        region=CALIF.geometry(),
        scale=CELL_SIZE,
        crs=TARGET_PROJ,
        fileFormat="TFRecord",
        formatOptions=dict(
            patchDimensions=[1, 1]
        )
    )

In [28]:
img_tasks = [make_image_export(img) for img in annual_images]

In [29]:
# for t in img_tasks: t.start()

### Rectangular export

TFRecord export is not worth it for pixel-wise classification. Instead, exhaustively sample all pixels and export as CSV.

In [30]:
def make_image_sample(img):
    ms = img.get("system:time_start").getInfo()
    year = datetime.datetime.utcfromtimestamp(ms/1000.0).year
    
    sample = img.sample(
        region=CALIF.geometry(),
        scale=CELL_SIZE,
        projection=TARGET_PROJ
    ).map(lambda x: x.setGeometry(None))

    return ee.batch.Export.table.toCloudStorage(
        description="yr{}".format(year),
        fileNamePrefix="preisler-rectangular/yr{}".format(year),
        collection=sample,
        bucket="preisler_tfdata"
    )

# sample_collection = ee.ImageCollection(annual_images).map(make_image_sample).flatten()

tasks = [make_image_sample(img) for img in annual_images]

In [31]:
for t in tasks: t.start()

### Export images as assets
In case we want to mess with them in the code editor.

In [32]:
def make_image_export(img):
    ms = img.get("system:time_start").getInfo()
    year = datetime.datetime.utcfromtimestamp(ms/1000.0).year
    
    return ee.batch.Export.image.toAsset(
        image=img,
        description="preisler-annual-image-{}".format(year),
        assetId="projects/forest-lst/assets/preisler-annual-images/{}".format(year),
        region=CALIF.geometry(),
        scale=CELL_SIZE,
        crs=TARGET_PROJ
    )

tasks = [make_image_export(img) for img in annual_images]

In [33]:
for t in tasks: t.start()