<a href="https://colab.research.google.com/github/noaakwey/geobotany/blob/main/Forest_communities_WekaXMean_Clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import ee, eemont
# Initialize the library.
ee.Authenticate()
ee.Initialize()

In [None]:
#Landsat 8 and 9 preprocess and masking
year = 2018
roi = ee.FeatureCollection("projects/ee-landeco/assets/tatarstan_grid")

l8 = (ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")
      .filterBounds(roi)
      .filter(ee.Filter.calendarRange(year,year+4,'year'))
      .filter(ee.Filter.calendarRange(4,11,'month'))
      .preprocess()
      .spectralIndices('vegetation')
      )
l9 = (ee.ImageCollection("LANDSAT/LC09/C02/T1_L2")
      .filterBounds(roi)
      .filter(ee.Filter.calendarRange(year,year+4,'year'))
      .filter(ee.Filter.calendarRange(4,11,'month'))
      .preprocess()
      .spectralIndices('vegetation')
      )
l89 = l8.merge(l9).sort("system:time_start")
l89_inx = l89.select(['BWDRVI', 'CIG', 'CVI', 'DSI', 'DSWI1', 'DVI', 'DVIplus', 'EVI2', 'FCVI', 'GARI', 'GBNDVI', 'GCC', 'GDVI', 'GEMI', 'GLI',
                      'GNDVI', 'GOSAVI', 'GRNDVI', 'GRVI', 'GSAVI', 'GVMI', 'IAVI', 'IKAW', 'IPVI', 'MCARI2', 'MGRVI', 'MNDVI',
                      'MNLI', 'MRBVI', 'MSAVI', 'MSI', 'MSR', 'MTVI2', 'NDDI', 'NDGI', 'NDII', 'NDMI', 'NDPI', 'NDVI', 'NDYI', 'NGRDI',
                      'NIRv', 'NIRvH2', 'NLI', 'NMDI', 'NRFIg', 'NRFIr', 'NormG', 'NormNIR', 'NormR', 'OCVI', 'OSAVI', 'RCC', 'RDVI', 'RGBVI', 'RGRI',
                      'RI', 'SARVI', 'SAVI', 'SEVI', 'SI', 'SIPI', 'SR', 'TDVI', 'TGI', 'TSAVI', 'TVI', 'TriVI', 'VARI', 'VIG', 'WDRVI', 'WDVI'])

l89_select = [l89_inx.reduce(ee.Reducer.median()).multiply(100),
              l89_inx.reduce(ee.Reducer.mean()).multiply(100),
              l89_inx.reduce(ee.Reducer.max()).multiply(100),
              l89_inx.reduce(ee.Reducer.stdDev()).multiply(100)]
l89_comp = ee.Image.cat([l89_select])

# Load the MASK dataset
mask_dataset = ee.Image("projects/ee-landeco/assets/GLAD_RT_MASK")
mask = mask_dataset.eq(1)
l89_Vi_mask = l89_comp.updateMask(mask).clip(roi).toShort()

In [None]:
# Make the training dataset.
training = l89_Vi_mask.sample(**{
    'region': roi.geometry(),
    'scale': 30,
    'numPixels': 20000,
    'tileScale': 16
})

# Instantiate the clusterer and train it.
clusterer = ee.Clusterer.wekaXMeans(**{
    'minClusters': 36,
    'maxClusters': 50,
}).train(training)

# Apply clusterer model to each row_id
def apply_clusterer(feature):
    row_id = feature.get('row_id')
    roi_clip = roi.filter(ee.Filter.eq('row_id', row_id))
    result = l89_Vi_mask.clip(roi_clip.geometry()).cluster(clusterer)
    return result.set('row_id', row_id)

result_images = roi.map(apply_clusterer)

# Convert the image collection to a list of images
result_images_list = result_images.toList(result_images.size())

# Create mosaic of clustered images
mosaic_image = ee.ImageCollection.fromImages(result_images_list).mosaic()

# Define task configuration
task_config = {
    'image': mosaic_image.clip(roi.geometry()),
    'folder': "__name-Of-Folder__",
    'description': '__name-of-file__',
    'fileFormat': 'GeoTIFF',
    'crs': 'EPSG:32639', #in our case (UTM 39N)
    'scale': 30,
    'region': roi.geometry(),
    'maxPixels': 1e12
}

# Export the mosaic image to Google Drive
task = ee.batch.Export.image.toDrive(**task_config)
task.start()