In [1]:
"""
Extract TreeMap (c.a. 2016) statistics in within regular grid (aggregated FRP)
    - count of pixels for each forest species in gridcells (percent cover)
    - for each species (masked), calculate the mean TreeMap metrics (BALIVE, SDI, STANDHT)
    - for each species (masked), calculate the mean Sentinel-2 metrics (LAI, MNDWI)
Author: maxwell.cook@colorado.edu
"""

import ee, geemap
import os, sys, time
import pandas as pd

# Custom functions
sys.path.append(os.path.join(os.getcwd(),'code/'))
from __functions import *

ee.Authenticate()
ee.Initialize(project='jfsp-aspen')

maindir = '/Users/max/Library/CloudStorage/OneDrive-Personal/mcook/'
projdir = os.path.join(maindir, 'aspen-fire/Aim2/')

print("Success")

Success


In [2]:
# load and prep the USFS TreeMap from GEE
treemap = ee.ImageCollection("USFS/GTAC/TreeMap/v2016")
print(f"TreeMap bands available for analysis:\n\n{treemap.first().bandNames().getInfo()}")

TreeMap bands available for analysis:

['ALSTK', 'BALIVE', 'CANOPYPCT', 'CARBON_D', 'CARBON_DWN', 'CARBON_L', 'DRYBIO_D', 'DRYBIO_L', 'FLDSZCD', 'FLDTYPCD', 'FORTYPCD', 'GSSTK', 'QMD_RMRS', 'SDIPCT_RMRS', 'STANDHT', 'STDSZCD', 'TPA_DEAD', 'TPA_LIVE', 'Value', 'VOLBFNET_L', 'VOLCFNET_D', 'VOLCFNET_L']


In [3]:
# grab the metrics we care about
treemap = treemap.select(['FORTYPCD','FLDTYPCD','BALIVE','SDIPCT_RMRS','STANDHT','TPA_LIVE','TPA_DEAD'])
treemap.first().bandNames().getInfo()

['FORTYPCD',
 'FLDTYPCD',
 'BALIVE',
 'SDIPCT_RMRS',
 'STANDHT',
 'TPA_LIVE',
 'TPA_DEAD']

In [4]:
treemap

In [5]:
# extract the class code table, create a dictionary
class_codes = treemap.first().get('FORTYPCD_class_values').getInfo()
class_names = treemap.first().get('FORTYPCD_class_names').getInfo()
code_to_name = dict(zip(class_codes, class_names)) # link class code to name

# convert to a data frame and export
species_df = pd.DataFrame(list(code_to_name.items()), columns=['FORTYPCD', 'SpeciesName'])
species_df['FORTYPCD'] = species_df['FORTYPCD'].astype(int)
species_df = species_df.reset_index(drop=True)
species_df.head()

Unnamed: 0,FORTYPCD,SpeciesName
0,101,Jack pine
1,102,Red pine
2,103,Eastern white pine
3,104,Eastern white pine / eastern hemlock
4,105,Eastern hemlock


In [6]:
print(species_df[species_df['SpeciesName'] == 'Ponderosa pine']) # check accuracy
print(species_df[species_df['SpeciesName'] == 'Lodgepole pine']) 

    FORTYPCD     SpeciesName
27       221  Ponderosa pine
    FORTYPCD     SpeciesName
44       281  Lodgepole pine


In [7]:
# Save this file out.
out_fp = os.path.join(projdir,'data/tabular/mod/treemap_fortypcd_species_mapping.csv')
species_df.to_csv(out_fp)
print(f"Dictionary saved to {out_fp}")

Dictionary saved to /Users/max/Library/CloudStorage/OneDrive-Personal/mcook/aspen-fire/Aim2/data/tabular/mod/treemap_fortypcd_species_mapping.csv


In [9]:
# load the gridded FRP data for aspen fires

In [8]:
grid = ee.FeatureCollection('projects/jfsp-aspen/assets/viirs_snpp_jpss1_afd_gridstats')
print(f"{grid.size().getInfo()} total gridcells.")
print(grid.first().propertyNames().getInfo())

55936 total gridcells.
['Fire_Year', 'grid_index', 'Fire_ID', 'max_date', 'first_obs', 'afd_count', 'Ig_Date', 'Last_Date', 'system:index', 'last_obs']


In [9]:
grid.limit(10)

In [12]:
# calculate the species histogram

In [10]:
constant = ee.Image.constant(1) # creates a constant image
constant = constant.reproject(treemap.mosaic().projection()) # scale to treemap projection

In [16]:
def species_histogram(ftr):
    """ Generates a histogram of occurrence in a region """
    
    # calculate the histograms
    sp_hist_for = treemap.select('FORTYPCD').mosaic().reduceRegion(
        reducer=ee.Reducer.frequencyHistogram(),
        geometry=ftr.geometry(),
        scale=30
    ).get('FORTYPCD')

    sp_hist_fld = treemap.select('FLDTYPCD').mosaic().reduceRegion(
        reducer=ee.Reducer.frequencyHistogram(),
        geometry=ftr.geometry(),
        scale=30
    ).get('FLDTYPCD')

    # grab the total pixels used in calculation
    total_pixels = constant.reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=ftr.geometry(),
        scale=30
    ).get('constant')

    sp_hist_for_json = ee.Dictionary(sp_hist_for).map(
        lambda key, value: ee.String(key).cat(':').cat(ee.Number(value).format())
    ).values().join(', ')

    sp_hist_fld_json = ee.Dictionary(sp_hist_fld).map(
        lambda key, value: ee.String(key).cat(':').cat(ee.Number(value).format())
    ).values().join(', ')

    # return the histogram dictionary without unpacking
    return ftr.set({
        'fortypcd_hist': sp_hist_for_json,
        'fldtypcd_hist': sp_hist_fld_json,
        'total_pixels': total_pixels
    })

# map across gridcells
grid = grid.select(['grid_index','.geo']) # just keep the grid ID and geometry
fortypcd = grid.map(species_histogram) # apply the function to the grid
print("Process submitted to the server !")

Process submitted to the server !


In [17]:
# check the results
fortypcd.first().propertyNames().getInfo()

['total_pixels',
 'fldtypcd_hist',
 'fortypcd_hist',
 'system:index',
 'grid_index']

In [22]:
sample = fortypcd.limit(10).getInfo()
props = [f['properties'] for f in sample['features']]
df = pd.DataFrame(props)
df.head()

Unnamed: 0,fldtypcd_hist,fortypcd_hist,grid_index,total_pixels
0,"201:14.113725490196078, 221:95.74901960784314,...","182:3.0, 185:3.0, 201:16.145098039215686, 221:...",1690163,195.015686
1,"201:8.109803921568627, 221:133.2862745098039, ...","182:3.0, 201:9.262745098039217, 221:166.690196...",1690164,194.752941
2,"185:1.0, 201:19.890196078431373, 221:113.09803...","182:1.1803921568627451, 185:4.0, 201:24.427450...",1692414,194.988235
3,"185:0.7098039215686275, 201:21.34509803921569,...","182:19.87058823529412, 185:1.9607843137254903,...",1694664,194.788235
4,"185:7.807843137254902, 201:25.184313725490195,...","182:6.556862745098039, 185:9.807843137254903, ...",1692412,194.984314


In [23]:
# unpack histogram dictionary into columns
def parse_histogram(hist_str):
    kv_pairs = hist_str.split(', ')
    return {int(kv.split(':')[0]): float(kv.split(':')[1]) for kv in kv_pairs}
df['fortypcd_hist'] = df['fortypcd_hist'].apply(parse_histogram)
df = df['fortypcd_hist'].apply(pd.Series)
print("Adjusted DataFrame:")
df.head(10)

Adjusted DataFrame:


Unnamed: 0,182,185,201,221,261,265,266,267,281,369,901,971,371,974,366
0,3.0,3.0,16.145098,118.152941,31.098039,3.360784,2.0,6.768627,3.286275,2.372549,1.0,4.831373,,,
1,3.0,,9.262745,166.690196,15.513725,,,0.227451,,,,0.058824,,,
2,1.180392,4.0,24.427451,120.721569,30.956863,3.317647,,1.0,1.0,,,5.384314,2.0,1.0,
3,19.870588,1.960784,18.141176,79.121569,53.54902,3.678431,,3.580392,,,2.0,10.886275,1.0,,1.0
4,6.556863,9.807843,7.45098,129.243137,24.392157,1.0,,5.729412,,,,7.803922,1.0,2.0,
5,0.905882,2.0,8.203922,135.333333,32.690196,7.356863,,2.423529,2.713725,,,3.611765,,,
6,19.631373,7.403922,11.388235,80.447059,34.172549,1.635294,,18.384314,,,4.0,17.87451,,,
7,5.662745,,25.560784,102.184314,44.435294,,,2.0,,,3.0,11.219608,,,0.929412
8,21.682353,,11.486275,95.372549,30.466667,,,2.0,,3.0,1.0,30.266667,,,
9,26.117647,10.631373,5.364706,91.886275,19.0,4.0,,15.443137,,,,17.807843,,,


In [24]:
# Export to Asset/Drive
task = ee.batch.Export.table.toDrive(
    collection=fortypcd,
    description='gridstats_treemap_for_fld',
    fileNamePrefix='gridstats_treemap_for_fld',
    fileFormat='CSV',
    folder='TreeMap'
)

task.start() # Start the export task
print("Export to Google Drive started!")
monitor_export(task, timeout=120)

Export to Google Drive started!
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Waiting for export to finish..
	Patience young padawan.
Export completed successfully !!!!


In [None]:
# Gather the species metrics

In [17]:
cols = ['BALIVE','SDIPCT_RMRS','STANDHT','TPA_LIVE','TPA_DEAD']
grid = grid.select(['grid_index'])

# function to calculate mean by species
def species_metrics(ftr):
    image = treemap.mosaic()
    
    # Get species histogram
    hist = image.select('FORTYPCD').reduceRegion(
        reducer=ee.Reducer.frequencyHistogram(),
        geometry=ftr.geometry(),
        scale=30,
        maxPixels=1e13
    ).get('FORTYPCD')

    hist_dict = ee.Dictionary(hist)

    # Initialize dictionary to store species metrics
    def add_species_metrics(key, current_dict):
        key_str = ee.String(key)
        
        # Mask the image by the species type
        masked_image = image.updateMask(image.select('FORTYPCD').eq(ee.Number.parse(key).toInt()))
        
        # Calculate average metrics for the masked image
        metrics = masked_image.select(cols).reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=ftr.geometry(),  # Gridcell
            scale=30,
            maxPixels=1e13
        )
        
        # Construct a sub-dictionary for this species
        sp_metrics = ee.Dictionary.fromLists(
            ee.List(cols).map(lambda col: key_str.cat('_').cat(col).cat('_mn')),
            ee.List(cols).map(lambda col: metrics.get(col))
        )
        
        # Combine this species' metrics with the current dictionary
        return ee.Dictionary(current_dict).combine(sp_metrics)

    # Iterate over all species to calculate metrics
    metrics_dict = hist_dict.keys().iterate(add_species_metrics, ee.Dictionary())
    metrics_string = ee.Dictionary(metrics_dict).map(
        lambda key, value: ee.String(key).cat(':').cat(ee.Number(value).format())
    ).values().join(', ')
    
    # Return the feature with the species metrics as a single dictionary property
    return ftr.set({
        'species_metrics': metrics_string,
    })

# map the function over the grids
fortypcd_metrics = grid.map(species_metrics)
print("Submitted !")

Submitted !


In [18]:
sample = fortypcd_metrics.limit(10).getInfo()
props = [f['properties'] for f in sample['features']]
df = pd.DataFrame(props)
df.head()

Unnamed: 0,grid_index,species_metrics
0,919906,"182_BALIVE_mn:113.51390075683594, 182_SDIPCT_R..."
1,919907,"182_BALIVE_mn:113.51390075683594, 182_SDIPCT_R..."
2,919908,"182_BALIVE_mn:63.25205624256263, 182_SDIPCT_RM..."
3,922166,"182_BALIVE_mn:103.75088572904717, 182_SDIPCT_R..."
4,922171,"182_BALIVE_mn:103.60663347462486, 182_SDIPCT_R..."


In [19]:
# unpack histogram dictionary into columns
def parse_histogram(hist_str):
    kv_pairs = hist_str.split(', ')
    return {kv.split(':')[0]: float(kv.split(':')[1]) for kv in kv_pairs}
df['species_metrics'] = df['species_metrics'].apply(parse_histogram)
df = df['species_metrics'].apply(pd.Series)
print("Adjusted DataFrame:")
df.head(10)

Adjusted DataFrame:


Unnamed: 0,182_BALIVE_mn,182_SDIPCT_RMRS_mn,182_STANDHT_mn,182_TPA_DEAD_mn,182_TPA_LIVE_mn,185_BALIVE_mn,185_SDIPCT_RMRS_mn,185_STANDHT_mn,185_TPA_DEAD_mn,185_TPA_LIVE_mn,...,224_TPA_LIVE_mn,266_BALIVE_mn,266_STANDHT_mn,266_TPA_DEAD_mn,266_TPA_LIVE_mn,974_BALIVE_mn,974_SDIPCT_RMRS_mn,974_STANDHT_mn,974_TPA_DEAD_mn,974_TPA_LIVE_mn
0,113.513901,65.199997,29.0,12.036092,396.670441,44.991299,26.083333,20.285714,9.027069,773.867731,...,,,,,,,,,,
1,113.513901,65.199997,29.0,12.036092,396.670441,,,,,,...,,,,,,,,,,
2,63.252056,33.140844,29.0,12.036092,146.540987,52.111698,25.0,30.0,,428.988831,...,,,,,,,,,,
3,103.750886,57.694653,28.415572,48.841899,473.854566,67.542999,30.973641,24.497902,12.176737,429.010974,...,,,,,,,,,,
4,103.606633,57.993254,29.614211,24.430655,338.867526,83.925314,29.932623,23.493708,12.634444,177.626713,...,,,,,,,,,,
5,65.209068,33.666667,29.5,60.180462,149.361375,67.694416,31.167742,25.0,6.018046,491.86293,...,,,,,,,,,,
6,32.979301,17.1,26.0,12.036092,117.091606,,,,,,...,,,,,,,,,,
7,66.810949,36.760191,29.025478,6.018046,391.397017,,,,,,...,,,,,,,,,,
8,,,,,,,,,,,...,383.612244,118.076103,64.0,12.036092,469.070923,,,,,
9,88.638074,51.598332,29.624816,6.018046,540.409072,167.820503,54.333333,26.333333,6.018046,159.217926,...,383.612244,,,,,146.244202,70.599998,28.0,36.108276,522.528809


In [None]:
# export it.
export_task = ee.batch.Export.table.toDrive(
    collection=fortypcd_metrics,
    description='gridstats_fortypcd_metrics',
    fileNamePrefix='gridstats_fortypcd_metrics',
    fileFormat='CSV', 
    folder='TreeMap'
)

export_task.start() # Start the export task
print("Export to Earth Engine Asset started!")
monitor_export(export_task, 120)