# modisLandsat - notebook to calibrate and upload hierarchical random forests for Landsat LAI and fAPAR based on MODIS algorithms

## richard.fernandes@canada.ca

In [None]:
# Modules

In [9]:
# python modules 
import numpy as np
import pandas as pd
import pickle
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import export_text
pd.options.mode.chained_assignment = None  # default='warn'
import ee

In [1]:
# construct hierarchal random forests for FTL method
def hierarchicalRF(dataDictParent,dataDictChild,regressorsNames,regressorsGEENames, response, domainScaling,domainOffset, maxDepthParent=20, maxDepthChild=20, minSamplesSplit=11,maxleafnodesParent= 100, minSamplesLeafParent=10, maxleafnodesChild= 999, minSamplesLeafChild=10,maxFeatures="auto",nTrees = 100):
    
    
    # make generic names for regressors for use in GEE
    regressors = []
    for item in np.arange(1,len(regressorsNames)+1,1):
        regressors.append('x' + str(item))

    # Calibrate hierarchal randforest preeidctors for each biome
    for biome in dataDictParent.keys() : 
        print('biome:',biome)

        # subset only the regressors and response 
        dfBiome = dataDictChild[biome]['DF'][sum([regressorsNames,response],[])].astype('int')   
        dfParent = dataDictParent[biome]['DF'][sum([regressorsNames,response],[])].astype('int')


        dfBiome.columns = sum([regressors,response],[])
        dfParent.columns = sum([regressors,response],[])
        print('Total size ',dfBiome.shape)
        print('Parent size ',dfParent.shape)
         # populate a parent RF dictionary that holds a single tree RF used to partition data into child RFs
        parentRFDict = {}
        parentRFDict.update({'regressors': regressors})
        parentRFDict.update({'regressorsGEE': regressorsGEENames})
        parentRFDict.update({'response': response})
        parentRFDict.update({'domain':makeDomain(dfBiome[regressors],domainIndex,domainScaling,domainOffset)})
        parentRFDict.update({'RF': RandomForestRegressor(n_estimators=1,min_samples_leaf=minSamplesLeafParent,min_samples_split=minSamplesSplit,bootstrap=False,random_state=0,verbose=0,max_depth=maxDepthParent,max_leaf_nodes=maxleafnodesParent,max_features=maxFeatures,n_jobs=40) \
                                                                                                         .fit(dfParent[regressors], np.array(dfParent[response]).ravel())})         
        # label input data using the prediction from the parent RF as this will be unique
        dfBiome['estimate']=np.around(np.array(parentRFDict['RF'].predict(dfBiome[regressors])),decimals=3)

        # populate dictionary of children RFs, each childRF is itself a dictionary similar to the parentRF but now using more than one tree
        # each child is labelled using the prediction value from the parentRF corresponding to its partition
        childrenRFDict = {}
        print('number children:',np.unique(np.around(np.array(parentRFDict['RF'].predict(dfBiome[regressors])),decimals=3)).size)
        for partition in np.unique(np.around(np.array(parentRFDict['RF'].predict(dfBiome[regressors])),decimals=3)):
            dfpartitionBiome = dfBiome.loc[dfBiome['estimate'] == partition]
            childRFDict = {}
            childRFDict.update({'size': dfpartitionBiome[response].shape[0]})
            childRFDict.update({'regressors': regressors})
            childRFDict.update({'regressorsGEE': regressorsGEENames})
            childRFDict.update({'response': response})
            childRFDict.update({'domain':makeDomain(dfpartitionBiome[regressors],domainIndex,domainScaling,domainOffset)})
            childRFDict.update({'RF': RandomForestRegressor(n_estimators=nTrees,min_samples_leaf=minSamplesLeafChild,bootstrap=True,random_state=0,verbose=0,max_depth=maxDepthChild,max_leaf_nodes=maxleafnodesChild,max_features=maxFeatures,n_jobs=40) \
                                     .fit(dfpartitionBiome[regressors], np.array(dfpartitionBiome[response]).ravel())})             
            childrenRFDict.update({partition: childRFDict})

        # assign the childrenRFDict to the parent
        parentRFDict.update({'childrenRFDict':childrenRFDict })      

        #assign the parentRF dict to the calibration data dictionary for trhis biome
        dataDictParent[biome].update({method+response[0]+'parentRFDict':parentRFDict})   
    return dataDictParent

In [3]:
#code the input feature domain by using a linear hash for each row of the input data frame
#the hash algorithm converts each input row into an integer from 0 to 9 by applying the provided scale and offset and then rounding
#is then produces a hash entry for each row by packing the integers consequitively to form a uint64 code
#this implies a limit of at most 18 columns for the input data frame
#returns a list corresonding to hash table of unique coded input rows
def makeDomain(df,domainIndex,domainScaling,domainOffset):
    df = np.array(df) 
    if df.shape[1] < 19 :
        domainIndex = np.array(domainIndex)
        domainScaling = np.array(domainScaling)
        domainOffset = np.array(domainOffset)
    else:
        raise ValueError("More than 18 dimensions in domain")
    return np.uint64(np.unique(np.sum(np.clip(np.around(df* domainScaling + domainOffset,0),0,9) * np.power(10,np.cumsum(domainIndex)-domainIndex[0]),1),0)).tolist()

In [2]:
# parse a sckitlearn decision tree into a R text tree suitable for use in GEE
# for compactness ancillary items like node sample size and residuals are forced to = 1
# this is a blind guess by Richard but seems to work
def make_tree(rf,regressors,maxdepth,decimals):
    
    # first get the output in sckitlearn text format in a dataframe
    r = export_text(decision_tree=rf,feature_names=regressors,show_weights=True,decimals=decimals,max_depth=depth)
    r = r.splitlines()
    rdf = pd.DataFrame(r,columns = ['rule'])

    #identify rules and not leaf values
    isrule = ~rdf['rule'].str.contains('value')
    rulesdf = rdf.loc[isrule]

    #determine level in tree and the associated starting based node number
    rdf['level'] = rdf['rule'].str.count(r'(\|)').values.tolist()
    rdf.loc[isrule,'base'] = ((rdf.level).mul(0).add(2)).pow(rdf.level)

    # get the actual tested condition
    rdf.loc[isrule,'condition'] =  rdf.loc[isrule,'rule'].str.extract(r'(x.+)').values.tolist()
    
    # identify leaf nodes and fill in the response value
    rdf.loc[~isrule,'leaf'] = '*'
    rdf['leaf'] = rdf['leaf'].fillna(method='bfill',limit=1)
    rdf.loc[~isrule,'response'] = rdf.loc[~isrule,'rule'].str.extract(r'([+-]?([0-9]*[.])?[0-9]+)')[0].values.tolist()
    rdf['response'] = rdf['response'].fillna(method='bfill')

    #discard non rules
    rdf.loc[rdf['leaf'].isna(),'leaf'] = ' '
    rdf = rdf.dropna()

    #dtermine if this is a left or right branch
    rdf['branch'] = rdf['rule'].str.contains(r'(?:\>)').astype('int')
    rdf['node'] = rdf.base + rdf.branch
    rdf.loc[rdf.level==1,'node']=rdf.loc[rdf.level==1,'branch'] + 2
    rdfindex = rdf.index

    #asign a node number, this is non trivial and critical for use later
    #read https://www.r-bloggers.com/2022/10/understanding-leaf-node-numbers-when-using-rpart-and-rpart-rules/
    for row in range(2,rdf.shape[0]):
        # find the nearest row above
        df = rdf[0:row]
        if ( (rdf[row:row+1].level.values)[0] > 1 ):
            parentdf = df.loc[df.level == (rdf[row:row+1].level.values-1)[0]].iloc[-1]
            rdf.at[rdfindex[row],'parentbase'] = parentdf.base  
            rdf.at[rdfindex[row],'parentnode'] = parentdf.node  
            rdf.at[rdfindex[row],'node'] = rdf.iloc[row].node + 2 * (  parentdf.node - parentdf.base ) 
            
    # glue together each rule in a big string, add the root node and return as a list
    rdf['phrase'] = rdf.apply(lambda x:  ' ' *(2 * x.level) + str(int(x.node)) + ') ' + x.condition + ' 0 0 ' + str(x.response) + ' ' +x.leaf + '\n', axis=1)
    return ( '1) root 1 1 1 (1)\n'+''.join(rdf['phrase'].values.tolist()))

In [5]:
# This function was taken from the GEEMAP libraries and modified as needed here
def export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,responseGEEScaling,responseGEEOffset,regressorsGEEScaling2, \
                            regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,asset_id,description="geemap_rf_export"):

    """Function that creates a feature collection with a property tree which contains the string representation of decision trees and exports to ee asset for later use
        together with CCRS tree properties
    args:
        trees (list[str]): list of string representation of the decision trees
        response (str): name of response variable
        regressors (list[str]): list pf strings of names of regressors variables in the created trees
        regressorsGEECollectionName (str) : name of GEE input collection
        regressorsGEENames (list[str]): list of names of the regressors variables in the GEE input collection
      	responseGEEScaling (list[float]): list of scaling values to apply to GEE output image
        responseGEEOffset (list[float]): list of  offset values to apply to GEE output image
        regressorsGEEScaling2 (list[float]): list of scaling values to apply to GEE input collection after initial scale ad offset is applied
        regressorsGEEScaling (list[float]): list of scaling values to apply to GEE input collection
        regressorsGEEOffset (list[float]): list of  offset values to apply to GEE input collection
        domain (list[uint64]): list of domain code values
        domainScaling (list[float]) : list of scaling values to create domain
        domainOffset ( list[float]): list of offset values to create domain
        asset_id (str): ee asset id path to export the feature collection to


    kwargs:
        description (str): optional description to provide export information. default = "geemap_rf_export"

    """
    # create a null geometry point. This is needed to properly export the feature collection
    null_island = ee.Geometry.Point([0, 0])

    # create a list of feature over null island
    # set the tree property as the tree string
    # encode return values (\n) as #, use to parse later

    
    features = [
        ee.Feature(null_island, {"tree": tree.replace("\n", "#"),\
                                 "response": ','.join(response),\
                                 "regressors": ','.join(regressors),\
                                 "regressorsGEECollectionName":regressorsGEECollectionName,\
                                 "regressorsGENames": ','.join(regressorsGEENames),\
                                 "responseGEScaling": ','.join(str(x) for x in responseGEEScaling),\
                                 "responseGEOffset": ','.join(str(x) for x in responseGEEOffset),\
                                 "regressorsGEScaling2": ','.join(str(x) for x in regressorsGEEScaling2),\
                                 "regressorsGEScaling": ','.join(str(x) for x in regressorsGEEScaling),\
                                 "regressorsGEOffset": ','.join(str(x) for x in regressorsGEEOffset),\
                                 # "domain": ','.join(str(x) for x in domain),\
                                 "domainScaling": ','.join(str(x) for x in domainScaling),\
                                 "domainOffset": ','.join(str(x) for x in domainOffset)} ) for tree in trees]
    
    # cast as feature collection
    fc = ee.FeatureCollection(features)

    # get export task and start
    task = ee.batch.Export.table.toAsset(
        collection=fc, description=description, assetId=asset_id
    )
    task.start()

In [8]:
def strings_to_classifier(trees=None,outputMode='REGRESSION'):
    """Function that takes string representation of decision trees and creates a ee.Classifier that can be used with ee objects

    args:
        trees (list[str]): list of string representation of the decision trees
        outputMode [str] : classifier output mode
    returns:
        classifier (ee.Classifier): ee classifier object representing an ensemble decision tree

    """

    # convert strings to ee.String objects
    ee_strings = [ee.String(tree) for tree in trees]

    # pass list of ee.Strings to an ensemble decision tree classifier (i.e. RandomForest)
    classifier = ee.Classifier.decisionTreeEnsemble(ee_strings).setOutputMode(outputMode)

    return classifier


# Read in calibration dictionaries
## There are separate dictionaries for Saturated and UnSaturated samples for each of LAI and FAPAR and for the naive algorithm and the FTL algorithm
## Each dictionary contains one key per biome number 
## Each biome has a dictionary that at minimum includes a key "DF" that corresponds to the calibration data 

In [None]:
# Read in calibration dictionaries
# There are separate dictionaries for Saturated and UnSaturated samples for each of LAI and FAPAR and for the naive algorithm and the FTL algorithm
# Each dictionary contains one key per biome number 
# Each biome has a dictionary that at minimum includes a key "DF" that corresponds to the calibration data 
calbiomeDictLAINAIVESat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictLAINAIVESat.pkl')
calbiomeDictLAINAIVEUnSat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictLAINAIVEUnSat.pkl')
calbiomeDictLAIFTLSat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictLAIFTLSat.pkl')
calbiomeDicLAIFTLUnSat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictLAIFTLUnSat.pkl')
calbiomeDictLAIFTL2Sat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictLAIFTL2Sat.pkl')
calbiomeDictLAIFTL2UnSat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictLAIFTL2UnSat.pkl')

calbiomeDictFAPARNAIVESat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictFAPARNAIVESat.pkl')
calbiomeDictFAPARNAIVEUnSat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictFAPARNAIVEUnSat.pkl')
calbiomeDictFAPARFTLSat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictFAPARFTLSat.pkl')
calbiomeDictFAPARFTLUnSat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictFAPARFTLUnSat.pkl')
calbiomeDictFAPARFTL2Sat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictFAPARFTL2Sat.pkl')
calbiomeDictFAPARFTL2UnSat= pd.read_pickle('c:/users/rfernand/modisLandsat/code/calbiomeDictFAPARFTL2UnSat.pkl')

# Calibrate biome specific Hierarichal RF models and save as a new key in the associated dictionary for each biome

In [None]:
# FTLSat LAI
method = 'FTLSat'
regressors = ['red','NIR','cosSZA','cosVZA','cosSA']
response = ['LAI']

# Stuff for GEE
#Specify scaling and index for Domain, currently we only allow one digit per regressor
# Number of digits per regressor
domainIndex = [1,1,1,1,1]

# Scaling per regressor
domainScaling = [ 10/10000, 10/10000,10/10000,10/10000,10/10000]

# Offset per regressor
domainOffset = [ 0,0,0,0,0]

#GEE regressor names
regressorsGEENames =  [ 'SR_B4', 'SR_B5','cosSZA','cosVZA','cosSA'] 

#Calibrate the Hierarchical RF
calbiomeDictLAIFTLSat= hierarchicalRF(calbiomeDictLAIFTLSat,calbiomeDictLAINAIVESat,regressors,regressorsGEENames, response, domainScaling,domainOffset, maxDepthParent=20,maxDepthChild=20,minSamplesSplit=2,\
                             maxleafnodesParent=999,  minSamplesLeafParent=1000,maxleafnodesChild= 999, minSamplesLeafChild=1,maxFeatures=4,nTrees = 100)

# Run this if you want to check the predictions
# calbiomeDictLAIFTLSat=  predictClassifier(calbiomeDictLAIFTLSat,calbiomeDictLAIFTLSat, 'FTLSat', regressors, ['LAI'])

In [None]:
# FTLUnSat LAI
method = 'FTLUnSat'
regressors = ['red','NIR','cosSZA','cosVZA','cosSA']
response = ['LAI']

# Stuff for GEE
#Specify scaling and index for Domain, currently we only allow one digit per regressor
# Number of digits per regressor
domainIndex = [1,1,1,1,1]

# Scaling per regressor
domainScaling = [ 10/10000, 10/10000,10/10000,10/10000,10/10000]

# Offset per regressor
domainOffset = [ 0,0,0,0,0]

#GEE regressor names
regressorsGEENames =  [ 'SR_B4', 'SR_B5','cosSZA','cosVZA','cosSA'] 

#Calibrate the Hierarchical RF
calbiomeDictLAIFTLUnSat= hierarchicalRF(calbiomeDictLAIFTLUnSat,calbiomeDictLAINAIVEUnSat,regressors,regressorsGEENames, response, domainScaling,domainOffset, maxDepthParent=20,maxDepthChild=20,minSamplesSplit=2,\
                             maxleafnodesParent=999,  minSamplesLeafParent=1000,maxleafnodesChild= 999, minSamplesLeafChild=1,maxFeatures=4,nTrees = 100)

# Run this if you want to check the predictions
# calbiomeDictLAIFTLUnSat=  predictClassifier(calbiomeDictLAIFTLUnSat,calbiomeDictLAIFTLUnSat, 'FTLUnSat', regressors, ['LAI'])

In [None]:
# FTLSat FAPAR
method = 'FTLSat'
regressors = ['red','NIR','cosSZA','cosVZA','cosSA']
response = ['FAPAR']

# Stuff for GEE
#Specify scaling and index for Domain, currently we only allow one digit per regressor
# Number of digits per regressor
domainIndex = [1,1,1,1,1]

# Scaling per regressor
domainScaling = [ 10/10000, 10/10000,10/10000,10/10000,10/10000]

# Offset per regressor
domainOffset = [ 0,0,0,0,0]

#GEE regressor names
regressorsGEENames =  [ 'SR_B4', 'SR_B5','cosSZA','cosVZA','cosSA'] 

#Calibrate the Hierarchical RF
calbiomeDictFAPARFTLSat= hierarchicalRF(calbiomeDictFAPARFTLSat,calbiomeDictFAPARNAIVESat,regressors,regressorsGEENames, response, domainScaling,domainOffset, maxDepthParent=20,maxDepthChild=20,minSamplesSplit=2,\
                             maxleafnodesParent=999,  minSamplesLeafParent=1000,maxleafnodesChild= 999, minSamplesLeafChild=1,maxFeatures=4,nTrees = 100)

# Run this if you want to check the predictions
# calbiomeDictFAPARFTLSat=  predictClassifier(calbiomeDictFAPARFTLSat,calbiomeDictFAPARFTLSat, 'FTLSat', regressors, ['FAPAR'])

In [None]:
# FTLUnSat FAPAR
method = 'FTLUnSat'
regressors = ['red','NIR','cosSZA','cosVZA','cosSA']
response = ['FAPAR']

# Stuff for GEE
#Specify scaling and index for Domain, currently we only allow one digit per regressor
# Number of digits per regressor
domainIndex = [1,1,1,1,1]

# Scaling per regressor
domainScaling = [ 10/10000, 10/10000,10/10000,10/10000,10/10000]

# Offset per regressor
domainOffset = [ 0,0,0,0,0]

#GEE regressor names
regressorsGEENames =  [ 'SR_B4', 'SR_B5','cosSZA','cosVZA','cosSA'] 

#Calibrate the Hierarchical RF
calbiomeDictFAPARFTLUnSat= hierarchicalRF(calbiomeDictFAPARUnFTLSat,calbiomeDictFAPARNAIVEUnSat,regressors,regressorsGEENames, response, domainScaling,domainOffset, maxDepthParent=20,maxDepthChild=20,minSamplesSplit=2,\
                             maxleafnodesParent=999,  minSamplesLeafParent=1000,maxleafnodesChild= 999, minSamplesLeafChild=1,maxFeatures=4,nTrees = 100)

# Run this if you want to check the predictions
# calbiomeDictFAPARFTLUnSat=  predictClassifier(calbiomeDictFAPARFTLUnSat,calbiomeDictFAPARFTLUnSat, 'FTLUnSat', regressors, ['FAPAR'])

# Upload trees to GEE

In [None]:
ee.Authenticate()

In [None]:
ee.Initialize()

In [None]:
# upload FTL classiers to gee as feature collections
method = 'FTL'

# response variable name
response = ['LAI']
targetDirectory = '/FTL_trees_LAI/'
methodDict = calbiomeDictLAIFTL

# Scaling and offset required of GEE regressors
regressorsGEECollectionName= "LANDSAT/LC08/C02/T1_L2"
regressorsGEEScaling = [2.75e-05,2.75e-05,1,1,1]
regressorsGEEOffset = [-0.02,-0.02,0,0,0]
regressorsGEEScaling2 = [10000,10000,1,1,1]
responseGEEScaling = [10]
responseGEEOffset = [0]

for biome in [1,2,3,4,5,6,7,8]:
    print('biome', biome)
    parentRFDict = methodDict[biome][method+response[0]+'parentRFDict']
    domain = parentRFDict['domain']
    trees = []
    trees.append(make_tree(parentRFDict['RF'][0],regressors,3))
    print('# of children', len(parentRFDict['childrenRFDict']))
    assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'parentRF' 
    export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,responseGEEScaling,responseGEEOffset,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
    ee_classifier = strings_to_classifier(trees)
    print(ee_classifier.getInfo())

    childSize = []
    for partition in parentRFDict['childrenRFDict'].keys():
        print('child #', partition)
        childRFDict = parentRFDict['childrenRFDict'][partition]
        childSize.append(childRFDict['size'])
        trees = []
        for tree in np.squeeze(childRFDict['RF'].estimators_) :
            trees.append(make_tree(tree,regressors,3))
        assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'childRF' + str(int(partition*1000)) 
        domain = [0]
        domainScaling = [0]
        domainOffset = [0]
        mlLocal.export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
        # ee_classifier = strings_to_classifier(trees)
        # print(ee_classifier.getInfo())
    print(childSize)

In [None]:
# upload FTL classiers to gee as feature collections
method = 'FTLSat'

# response variable name
response = ['LAI']
targetDirectory = '/FTL_trees_LAI_Sat/'
methodDict = calbiomeDictLAIFTLSAT

# Scaling and offset required of GEE regressors
regressorsGEECollectionName= "LANDSAT/LC08/C02/T1_L2"
regressorsGEEScaling = [2.75e-05,2.75e-05,1,1,1]
regressorsGEEOffset = [-0.02,-0.02,0,0,0]
regressorsGEEScaling2 = [10000,10000,10000,10000,10000]
responseGEEScaling = [10]
responseGEEOffset = [0]

for biome in [1,2,3,4,5,6,7,8]:
    print('biome', biome)
    parentRFDict = methodDict[biome][method+response[0]+'parentRFDict']
    domain = parentRFDict['domain']
    trees = []
    trees.append(make_tree(parentRFDict['RF'][0],regressors,3))
    print('# of children', len(parentRFDict['childrenRFDict']))
    assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'parentRF' 
    export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,responseGEEScaling,responseGEEOffset,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
    ee_classifier = strings_to_classifier(trees)
    print(ee_classifier.getInfo())

    childSize = []
    for partition in parentRFDict['childrenRFDict'].keys():
        print('child #', partition)
        childRFDict = parentRFDict['childrenRFDict'][partition]
        childSize.append(childRFDict['size'])
        trees = []
        for tree in np.squeeze(childRFDict['RF'].estimators_) :
            trees.append(make_tree(tree,regressors,3))
        assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'childRF' + str(int(partition*1000)) 
        domain = [0]
        domainScaling = [0]
        domainOffset = [0]
        mlLocal.export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
        # ee_classifier = strings_to_classifier(trees)
        # print(ee_classifier.getInfo())
    print(childSize)

In [None]:
# upload FTL classiers to gee as feature collections
method = 'FTLUNSat'

# response variable name
response = ['LAI']
targetDirectory = '/FTL_trees_LAI_UNSat/'
methodDict = calbiomeDictLAIFTLSAT

# Scaling and offset required of GEE regressors
regressorsGEECollectionName= "LANDSAT/LC08/C02/T1_L2"
regressorsGEEScaling = [2.75e-05,2.75e-05,1,1,1]
regressorsGEEOffset = [-0.02,-0.02,0,0,0]
regressorsGEEScaling2 = [10000,10000,10000,10000,10000]
responseGEEScaling = [10]
responseGEEOffset = [0]

for biome in [1,2,3,4,5,6,7,8]:
    print('biome', biome)
    parentRFDict = methodDict[biome][method+response[0]+'parentRFDict']
    domain = parentRFDict['domain']
    trees = []
    trees.append(make_tree(parentRFDict['RF'][0],regressors,3))
    print('# of children', len(parentRFDict['childrenRFDict']))
    assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'parentRF' 
    export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,responseGEEScaling,responseGEEOffset,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
    ee_classifier = strings_to_classifier(trees)
    print(ee_classifier.getInfo())

    childSize = []
    for partition in parentRFDict['childrenRFDict'].keys():
        print('child #', partition)
        childRFDict = parentRFDict['childrenRFDict'][partition]
        childSize.append(childRFDict['size'])
        trees = []
        for tree in np.squeeze(childRFDict['RF'].estimators_) :
            trees.append(make_tree(tree,regressors,3))
        assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'childRF' + str(int(partition*1000)) 
        domain = [0]
        domainScaling = [0]
        domainOffset = [0]
        mlLocal.export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
        # ee_classifier = strings_to_classifier(trees)
        # print(ee_classifier.getInfo())
    print(childSize)

In [None]:
# upload FTL classiers to gee as feature collections
method = 'FTLSat

# response variable name
response = ['FAPAR']
targetDirectory = '/FTL_trees_FAPAR_Sat
methodDict = calbiomeDictLAIFTLSAT

# Scaling and offset required of GEE regressors
regressorsGEECollectionName= "LANDSAT/LC08/C02/T1_L2"
regressorsGEEScaling = [2.75e-05,2.75e-05,1,1,1]
regressorsGEEOffset = [-0.02,-0.02,0,0,0]
regressorsGEEScaling2 = [10000,10000,10000,10000,10000]
responseGEEScaling = [100]
responseGEEOffset = [0]

for biome in [1,2,3,4,5,6,7,8]:
    print('biome', biome)
    parentRFDict = methodDict[biome][method+response[0]+'parentRFDict']
    domain = parentRFDict['domain']
    trees = []
    trees.append(make_tree(parentRFDict['RF'][0],regressors,3))
    print('# of children', len(parentRFDict['childrenRFDict']))
    assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'parentRF' 
    export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,responseGEEScaling,responseGEEOffset,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
    # ee_classifier = strings_to_classifier(trees)
    # print(ee_classifier.getInfo())

    childSize = []
    for partition in parentRFDict['childrenRFDict'].keys():
        print('child #', partition)
        childRFDict = parentRFDict['childrenRFDict'][partition]
        childSize.append(childRFDict['size'])
        trees = []
        for tree in np.squeeze(childRFDict['RF'].estimators_) :
            trees.append(make_tree(tree,regressors,3))
        assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'childRF' + str(int(partition*1000)) 
        domain = [0]
        domainScaling = [0]
        domainOffset = [0]
        mlLocal.export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
        # ee_classifier = strings_to_classifier(trees)
        # print(ee_classifier.getInfo())
    print(childSize)

In [None]:
# upload FTL classiers to gee as feature collections
method = 'FTLUNSat'

# response variable name
response = ['FAPAR']
targetDirectory = '/FTL_trees_FAPAR_UNSat/'
methodDict = calbiomeDictLAIFTLSAT

# Scaling and offset required of GEE regressors
regressorsGEECollectionName= "LANDSAT/LC08/C02/T1_L2"
regressorsGEEScaling = [2.75e-05,2.75e-05,1,1,1]
regressorsGEEOffset = [-0.02,-0.02,0,0,0]
regressorsGEEScaling2 = [10000,10000,10000,10000,10000]
responseGEEScaling = [100]
responseGEEOffset = [0]

for biome in [1,2,3,4,5,6,7,8]:
    print('biome', biome)
    parentRFDict = methodDict[biome][method+response[0]+'parentRFDict']
    domain = parentRFDict['domain']
    trees = []
    trees.append(make_tree(parentRFDict['RF'][0],regressors,3))
    print('# of children', len(parentRFDict['childrenRFDict']))
    assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'parentRF' 
    export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,responseGEEScaling,responseGEEOffset,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
    # ee_classifier = strings_to_classifier(trees)
    # print(ee_classifier.getInfo())

    childSize = []
    for partition in parentRFDict['childrenRFDict'].keys():
        print('child #', partition)
        childRFDict = parentRFDict['childrenRFDict'][partition]
        childSize.append(childRFDict['size'])
        trees = []
        for tree in np.squeeze(childRFDict['RF'].estimators_) :
            trees.append(make_tree(tree,regressors,3))
        assetID = geemap.ee_user_id() + targetDirectory + method + 'biome' + str(biome) + 'childRF' + str(int(partition*1000)) 
        domain = [0]
        domainScaling = [0]
        domainOffset = [0]
        mlLocal.export_trees_to_fc_CCRS(trees,response,regressors,regressorsGEECollectionName,regressorsGEENames,regressorsGEEScaling2,regressorsGEEScaling,regressorsGEEOffset,domain,domainScaling,domainOffset,assetID)
        # ee_classifier = strings_to_classifier(trees)
        # print(ee_classifier.getInfo())
    print(childSize)