In [None]:
import sys
#sys.path.append("/home/markdj/repos/Polesia-Landcover/data_stack_testing/")
sys.path.append("C:/Users/tpfdo/OneDrive/Documents/GitHub\Polesia-Landcover/data_stack_testing/")
import ee
import os
import csv
import matplotlib.pyplot as plt
from geemap import geemap
from data_stack_tools_v1 import fetch_sentinel1_v2, fetch_sentinel2_v2, map_topography, create_data_stack_v2
ee.Initialize()

In [None]:
# Paths and variable settings
#fp_train_ext = "/home/markdj/Dropbox/artio/polesia/val/Vegetation_extent_rough.shp"
fp_train_ext = "D:/tpfdo/Documents/Artio_drive/Projects/Polesia/Project_area.shp" # Area covered by the training data
fp_train_points = "D:/tpfdo/Documents/Artio_drive/Projects/Polesia/Training_data/Simple_points_500.shp" # Prepped training points
fp_target_ext = "D:/tpfdo/Documents/Artio_drive/Projects/Polesia/Classif_area.shp" # Area to be classified
fp_export_dir = "D:/tpfdo/Documents/Artio_drive/Projects/Polesia/Classified/"

label = 'VALUE' # Name of the classes column in your trainig data
scale = 20 # Sets the output scale of the analysis

aoi = geemap.shp_to_ee(fp_train_ext)
date_list = [('2018-12-01', '2019-02-01'),('2019-05-01', '2019-05-31')]
#date_list = [('2017-12-01', '2018-02-01')]  # this just generates a ~2 month composite

s2_params = {
    'CLOUD_FILTER': 60,       # int, max cloud coverage (%) permitted in a scene
    'CLD_PRB_THRESH': 40,     # int, 's2cloudless' 'probability' band value > thresh = cloud
    'NIR_DRK_THRESH': 0.15,   # float, if Band 8 (NIR) < NIR_DRK_THRESH = possible shadow
    'CLD_PRJ_DIST': 1,        # int, max distnce [TODO: km or 100m?] from cloud edge for possible shadow 
    'BUFFER': 100,             # int, distance (m) used to buffer cloud edges
    #'S2BANDS': ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12'] #list of str, which S2 bands to return?
    'S2BANDS': ['B2', 'B3', 'B4', 'B6', 'B8', 'B11'] #list of str, which S2 bands to return?
}

In [None]:
# Load the data stack
stack = create_data_stack_v2(aoi, date_list, s2_params)

band_names = stack.bandNames()
trainingbands = band_names.getInfo()
print('Training bands are:', trainingbands)

In [None]:
# Load and sample the training data
print('Loading training data...')
#trainingbands_sample = [trainingbands[i] for i in [1,10,18,19]] # Temporary selection of random bands to allow pipeline testing

training_points = geemap.shp_to_ee(fp_train_points)
data = stack.select(trainingbands).sampleRegions(collection=training_points,
                                                 properties=[label],
                                                 scale=scale)

# Remove null value samples (if needed, switch all the 'data' to dataNoNulls' below)
#dataNoNulls = data.filter(ee.Filter.notNull(data.first().propertyNames()))

# Split into train and test
split = 0.7
data = data.randomColumn(seed=0)
train = data.filter(ee.Filter.lt('random',split))
test = data.filter(ee.Filter.gte('random',split))

print('Training data loaded and ready!')

In [None]:
# Set up the Random Forest
print('Setting up random forest classifier...')
init_params = {"numberOfTrees":100, # the number of individual decision tree models
              "variablesPerSplit":None,  # the number of features to use per split
              "minLeafPopulation":1, # smallest sample size possible per leaf
              "bagFraction":0.5, # fraction of data to include for each individual tree model
              "maxNodes":None, # max number of leafs/nodes per tree
               "seed":0}  # random seed for "random" choices like sampling.

clf = ee.Classifier.smileRandomForest(**init_params).train(train, label, trainingbands)

# Carry out the Random Forest
print('Using random forest to classify region...')
target_area = geemap.shp_to_ee(fp_target_ext)
target_stack = stack.clip(target_area)
classified = stack.select(trainingbands).classify(clf)

# Export results to local
file_out = fp_export_dir+'Random_forest.tif'
roi = target_area.geometry()
geemap.ee_export_image(classified, filename=file_out, scale=scale, file_per_band=False, region=roi)

print('Classification complete!')

In [None]:
# Generate performance reports
print('Generating performance reports...')
training_csv = os.path.join(fp_export_dir, 'RF_train_confusion_matrix.csv')
testing_csv = os.path.join(fp_export_dir, 'RF_test_error_matrix.csv')

train_accuracy = clf.confusionMatrix()

validated = test.classify(clf)
test_accuracy = validated.errorMatrix('Landcover', 'Classification')

with open(training_csv, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(train_accuracy.getInfo())
    
with open(testing_csv, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(test_accuracy.getInfo())
    
print('Performance reports completed!')

In [None]:
print('Training overall accuracy: ', train_accuracy.accuracy())