# [ **REQUIRED** ] Import, initialize

In [1]:
import math
import geemap
import ee
import configparser as cp
import io
import ast
import subprocess

from classification import classifyAndAssess
from featuresForModeling import generateFeatures

ee.Initialize()

# [ **REQUIRED** ] Check that features available via config file matches those in the set of feature rasters

Each feature is created separately, in the `featuresForModeling` module, and exported as EE assets. The names of each of those features (i.e., the respective raster bandnames) are derived from the `config.ini` file and set at export time.

In this section, we read in the names from `config.ini` and compare that list with that from the feature raster assets. If the two do not match, it might be best to go back to the `featuresForModeling` module and fix it before proceeding here.

## Feature names from config file

Read-in, one by one, names of all features as stored in the config file.

**NOTE:** 
* This is done by manually scanning the config file. There is currently no way to do this programmatically. 
* `lon` and `lat` are not set in the config file, so are initialized here. Because they are not generated & saved as feature rasters but are simply invoked with `ee.Image.pixelLonLat()` at labeled points sampling time.

In [2]:
with open("config.ini", 'r') as f:
    fileContents = f.read()
config = cp.RawConfigParser(allow_no_value = True, interpolation = cp.ExtendedInterpolation())
config.read_file(io.StringIO(fileContents))

homeFolder = config.get('CORE', 'assetFolderWithFeatures')

configSeas = config['FEATURES-SEASONALITY']
phase      = configSeas.get('featureBandNamePhase')
ampl       = configSeas.get('featureBandNameAmplitude')
offset     = configSeas.get('featureBandNameOffset')
trend      = configSeas.get('featureBandNameTrend')
phase_seg  = configSeas.get('featureBandNamePhaseSegmented')
ampl_seg   = configSeas.get('featureBandNameAmplitudeSegmented')

configTCTd  = config['FEATURES-TASSELLEDCAP']
dBrt        = configTCTd.get('featureBandNameDrySeasonBrightness')
dGrn        = configTCTd.get('featureBandNameDrySeasonGreenness')
dWet        = configTCTd.get('featureBandNameDrySeasonWetness')
wBrt        = configTCTd.get('featureBandNameWetSeasonBrightness')
wGrn        = configTCTd.get('featureBandNameWetSeasonGreenness')
wWet        = configTCTd.get('featureBandNameWetSeasonWetness')
tctdiff     = configTCTd.get('featureBandNameTCTDifference')
dBrt_seg    = dBrt + '_seg'
dGrn_seg    = dGrn + '_seg'
dWet_seg    = dWet + '_seg'
wBrt_seg    = wBrt + '_seg'
wGrn_seg    = wGrn + '_seg'
wWet_seg    = wWet + '_seg'
tctdiff_seg = configTCTd.get('featureBandNameTCTDifferenceSegmented')

configGrBr   = config['FEATURES-GREENINGTEMPORALSTATS']
ndviMin      = configGrBr.get('featureBandNameGreening5thPerc')
ndviMed      = configGrBr.get('featureBandNameGreening50thPerc')
ndviMax      = configGrBr.get('featureBandNameGreening95thPerc')
grnExt       = configGrBr.get('featureBandNameGreeningExtent')
brnExt       = configGrBr.get('featureBandNameBrowningExtent')
grnbrnNd     = configGrBr.get('featureBandNameGrningBrningNd')
ndviMed_seg  = configGrBr.get('featureBandNameGreening50thPercSegmented')
grnExt_seg   = configGrBr.get('featureBandNameGreeningExtentSegmented')
brnExt_seg   = configGrBr.get('featureBandNameBrowningExtentSegmented')
grnbrnNd_seg = configGrBr.get('featureBandNameGrningBrningNdSegmented')

configPalsar = config['FEATURES-PALSAR']
palsar       = configPalsar.get('featureBandNamePalsarMedian')
palsar_seg   = configPalsar.get('featureBandNamePalsarMedianSegmented')

configEt   = config['FEATURES-EVAPOTRANSPIRATION']
etSoil     = configEt.get('featureBandNameEtSoil')
etVeg      = configEt.get('featureBandNameEtVeg')
etSoil_seg = configEt.get('featureBandNameEtSoilSegmented')
etVeg_seg  = configEt.get('featureBandNameEtVegSegmented')

configGeomorph = config['FEATURES-TOPOGRAPHY-GEOMORPHOLOGY-RUGGEDNESS']
geomorphRugged = configGeomorph.get('featureBandName')
geomorphRugged_seg = configGeomorph.get('featureBandNameGeomorphRuggedSegmented')

zoneNumSuff = 'Num'
zoneOheSuff = 'Ohe_'

configZnStates   = config['AOI-CLASSIFICATION-ZONES-STATES']
zoneStatePref    = configZnStates.get("featureBandNamePrefix")
zoneStateNum     = zoneStatePref + zoneNumSuff
zoneStateLabels  = list(ast.literal_eval(configZnStates.get("groupsOfStatesLabels")))
zoneStateOhe     = [zoneStatePref + zoneOheSuff + label for label in zoneStateLabels]

configZnBiomes  = config['AOI-CLASSIFICATION-ZONES-BIOMES']
zoneBiomePref   = configZnBiomes.get("featureBandNamePrefix")
zoneBiomeNum    = zoneBiomePref + zoneNumSuff
zoneBiomeLabels = list(ast.literal_eval(configZnBiomes.get("biomeLabels")))
zoneBiomeOhe    = [zoneBiomePref + zoneOheSuff + label for label in zoneBiomeLabels]

configZnGeolAge = config['AOI-CLASSIFICATION-ZONES-GEOLOGICAL-AGE']
zoneGeolAgePref = configZnGeolAge.get("featureBandNamePrefix")
zoneGeolAgeNum  = zoneGeolAgePref + zoneNumSuff
zoneGeolAgeLabels = list(ast.literal_eval(configZnGeolAge.get("geologicalAgeNames")))
zoneGeolAgeOhe    = [zoneGeolAgePref + zoneOheSuff + label for label in zoneGeolAgeLabels]

elev_seg = config.get('FEATURES-ELEVATION', 'featureBandNameElevSegmented')
ppt_seg = config.get('FEATURES-PRECIPITATION', 'featureBandNameAnnRainSegmented')
topoMtpi_seg = config.get('FEATURES-TOPOGRAPHY-MTPI', 'featureBandNameTopoMtpiSegmented')
topoHand_seg = config.get('FEATURES-TOPOGRAPHY-HAND', 'featureBandNameTopoHandSegmented')
aoi = config.get('AOI', 'bandNameAOI')

lon = "longitude"
lat = "latitude"

Gather them all into a list of lists, where sublists are conceptually grouped set of features.

**NOTE:** 
* The nesting should be one-deep, no more, no less
* Classification zones feature/s appear differently in the list, depending on whether they have numerical or one-hot encoding
  * Numeric: `[zoneNum]`, because it is a single band raster
  * One-hot: `zoneOhe`, because it is a multiband raster, and `zoneOhe` is already a list with all the bands

In [3]:
def flattenList(nestedList):
    fl = []
    for sublist in nestedList:
        fl = fl + sublist
    return fl

gatheredFromConfig = [ \
    [phase, ampl, offset, trend, phase_seg, ampl_seg], \
    [dBrt, dGrn, dWet, wBrt, wGrn, wWet, tctdiff, dBrt_seg, dGrn_seg, dWet_seg, wBrt_seg, wGrn_seg, wWet_seg, tctdiff_seg], \
    [ndviMin, ndviMed, ndviMax, grnExt, brnExt, grnbrnNd, ndviMed_seg, grnExt_seg, brnExt_seg, grnbrnNd_seg], \
    [palsar, palsar_seg], \
    [etSoil, etVeg, etSoil_seg, etVeg_seg], \
    [elev_seg], \
    [ppt_seg], \
    [topoMtpi_seg, topoHand_seg], \
    [geomorphRugged, geomorphRugged_seg], \
    [zoneStateNum], \
    zoneStateOhe, \
    [zoneBiomeNum], \
    zoneBiomeOhe, \
    [zoneGeolAgeNum], \
    zoneGeolAgeOhe, \
    [lon, lat], \
    [aoi]]

fromConfigFlattened = flattenList(gatheredFromConfig)

## Feature names from saved feature rasters

The function `assembleAllExistingFeatureRasters()` in the `generateFeatures` module gathers all relevant rasters from the config-designated folder of EE assets.

It "manually" reads-in all feature rasters using their generation functions, but with `returnExisting` flag set to `True`.


In [4]:
fromFeatureFolder = generateFeatures.assembleAllExistingFeatureRasters().bandNames()

## Compare & check for full match

In [5]:
print("Features from config file:", fromConfigFlattened)
print("Features from asset rasters:", fromFeatureFolder.getInfo())
print("Asset folder for rasters:", config.get('CORE', 'assetFolderWithFeatures'))

print("Full match between the two (True/False):", ee.List(fromConfigFlattened).containsAll(fromFeatureFolder).getInfo())

Features from config file: ['phase', 'amplitude', 'offset', 'trend', 'phase_seg', 'amplitude_seg', 'd_brt', 'd_grn', 'd_wet', 'w_brt', 'w_grn', 'w_wet', 'tct_diff', 'd_brt_seg', 'd_grn_seg', 'd_wet_seg', 'w_brt_seg', 'w_grn_seg', 'w_wet_seg', 'tct_diff_seg', 'mn', 'md', 'mx', 'grng_ext', 'brng_ext', 'nd_grbr', 'md_seg', 'grng_ext_seg', 'brng_ext_seg', 'nd_grbr_seg', 'palsar', 'palsar_seg', 'et_soil', 'et_veg', 'et_soil_seg', 'et_veg_seg', 'elev_seg', 'annrf_seg', 'topo_seg', 'topo_hand_seg', 'ruggedness', 'ruggedness_seg', 'zoneStateNum', 'zoneStateOhe_pbbh', 'zoneStateOhe_rj', 'zoneStateOhe_gj', 'zoneStateOhe_mh', 'zoneStateOhe_mpod', 'zoneStateOhe_aptg', 'zoneStateOhe_tn', 'zoneStateOhe_ka', 'zoneBiomeNum', 'zoneBiomeOhe_na', 'zoneBiomeOhe_fldgrssvn', 'zoneBiomeOhe_mntgrsshrb', 'zoneBiomeOhe_trsubtrmoistblfor', 'zoneBiomeOhe_tmpconiffor', 'zoneBiomeOhe_tempblmixdfor', 'zoneBiomeOhe_trsubtrconiffor', 'zoneBiomeOhe_mngr', 'zoneBiomeOhe_desxershrb', 'zoneBiomeOhe_trsubtrdryblfor', 'zone

## In case check fails...

... it means that updates to either the config file or raster set handled by `assembleAllExistingFeatureRasters()` has not been made to reflect in the other. 

Check for minor mistakes in either one or both of them.

OR

Check whether a new feature was recently added to the module and if it was done thoroughly:
* A separate section for it added in the `config.ini`, where its feature names are set
  * And it added to the [appropriate section here at the top here](#Feature-names-from-config-file)
* A separate function for it added in the `generateFeatures` module, that follows the standard structure as others with `returnExisting` and `startFreshExport` handling
  * Followed by its at least one successful execution with `startFreshExport = True`
* It getting added to the `assembleAllExistingFeatureRasters()`, with `returnExisting = True`

# Run classification, after selecting features, classifier and zone encoding mode

The `trainAndPredict()` function in the `classifyAndAssess` module is the workhorse for performing the classification workflow. It -
* trains a model (on a training fraction it creates on-the-fly)
* predicts with it to create the probability raster and calculates classifier accuracy metrics
* predicts with it to create the predicted points table, with the chosen input features, top1Label, classwise probabilities
* exports the predicted raster, predicted points table and a featurecollection with accuracy metrics

## HOW TO prepare a set of features to run classification with

Make a list of feature band names, using names in the [config file section above](#Feature-names-from-config-file), meant as input bands into the classifier. 

Since some of them are lists themselves, be sure to build the list here correctly, and then flatten it. See example below

In [6]:
# Set of features used in V1, with zoneNum added on -- which is used for region-wise assessment.
fs = [ \
      [phase_seg, ampl_seg], \
      [palsar_seg], \
      [ndviMed_seg, grnExt_seg, brnExt_seg, grnbrnNd_seg], \
      [topoHand_seg], \
      [tctdiff_seg], \
      [elev_seg], \
      [lon, lat], \
      [zoneStateNum]]
fsFlattened = flattenList(fs)
print("Selected list of features:", fsFlattened)

Selected list of features: ['phase_seg', 'amplitude_seg', 'palsar_seg', 'md_seg', 'grng_ext_seg', 'brng_ext_seg', 'nd_grbr_seg', 'topo_hand_seg', 'tct_diff_seg', 'elev_seg', 'longitude', 'latitude', 'zoneStateNum']


## HOW TO prepare classifier options and run, to export result into config-set location

Classifier options and features options are set as dictionaries and passed into the `trainAndPredict()` function.
* Classifier options currently supported:
  * `classifier`
    * _RandomForest_
    * _GradientBoostedTrees_
  * `numTrees`
    * number of trees in the classifier: passed to `numberOfTrees` argument of the trees `ee.Classifier.*` calls
  * `trainFraction`
    * Used for the train fraction of the train-test random split of labeled points
* Features options currently support:
  * `names`
    * list of names of features to use in the classification. Should include the zones encoded raster also
  * `zonationBasis`
    * basis on which zonation was done
      * _states_ 
      * _biomes_
      * _geologicalAge_
      * _None_
  * `zoneEncodingMode`
    * _numeric_
    * _oneHot_
    * _None_

## Classify with GBT classifier, all the good features we have and biomes zonation OHE format

In [7]:
cOpts_gbt200 = dict(classifier = "GradientBoostedTrees", numTrees = 200, trainFraction = 0.7)
globalModelGbtBiomesZonesOhe = [ \
                 [phase_seg, ampl_seg], \
                 [palsar_seg], \
                 [ndviMed_seg, grnExt_seg, brnExt_seg, grnbrnNd_seg], \
                 [topoHand_seg], \
                 [topoMtpi_seg], \
                 [dBrt_seg, dGrn_seg, dWet_seg, wBrt_seg, wGrn_seg, wWet_seg, tctdiff_seg], \
                 [ppt_seg], \
                 [elev_seg], \
                 [geomorphRugged_seg], \
                 zoneBiomeOhe]
globalModelGbtBiomesZonesOheFlattened = flattenList(globalModelGbtBiomesZonesOhe)
fOpts_globalModelGbt_biomeZonesOhe = dict(names = globalModelGbtBiomesZonesOheFlattened, zonationBasis = "biomes", zoneEncodingMode = "oneHot")

resFolderGlobalModelGbtBiomeZonesOhe = "globalModelGbtBiomeZonesOhe"
createResFolderCmd = f"earthengine create folder {homeFolder}{resFolderGlobalModelGbtBiomeZonesOhe}"
process = subprocess.Popen(createResFolderCmd.split(), stdout=subprocess.PIPE)
folderCreated, error = process.communicate()
print("Result folder creation error (if any):", folderCreated, error)

Result folder creation error (if any): b'Asset projects/ee-open-natural-ecosystems/assets/finalSprint/globalModelGbtBiomeZonesOhe already exists.\n' None


In [8]:
globalModelGbtBiomesZonesOheRes = classifyAndAssess.trainAndPredict(fOpts_globalModelGbt_biomeZonesOhe, cOpts_gbt200, resultNewFolderName = resFolderGlobalModelGbtBiomeZonesOhe, startFreshExport = True)