In [17]:
# Pipeline to generate AGB map based on random forest algorithm
import ee
import geemap
import folium
import time

ee.Initialize()
Map = geemap.Map()

# Define the region of interest of Amazon basin
# amazon_region = ee.Geometry.Polygon([[[-80.0, 10.0],
#             [-20.0, 10.0],
#             [-20.0, -65.0],                           
#             [-80.0, -65.0],
#             [-80.0, 10.0]]]) 
amazon_region = ee.Geometry.Polygon([[[-77.0, 5.0], 
                                      [-77.0, -17.0], 
                                      [-48.0, -17.0], 
                                      [-48.0, 5.0], 
                                      [-77.0, 5.0]]])

# Load the shapefile as an Earth Engine feature collection
amazon_region = ee.FeatureCollection("projects/test-project-agb/assets/AmazonBasinLimits-master")

# Print the first feature to verify
print(amazon_region.first().getInfo())

print("START")
# load sentinel-1 data 
spring = ee.Filter.date('2022-03-01', '2022-04-20');
lateSpring = ee.Filter.date('2022-04-21', '2022-06-10');
summer = ee.Filter.date('2022-06-11', '2022-08-31');

# Define the bitmasks
cloud_bit_mask = ee.Number(1 << 5)  # Cloud bit is in the 6th bit position
cirrus_bit_mask = ee.Number(1 << 9)  # Cirrus bit is in the 10th bit position

# Define a masking function
def mask_edges(image):
    edge = image.lt(-30.0)  # Define an edge mask where values are less than -30
    masked_image = image.mask().And(edge.Not())  # Mask out edges
    return image.updateMask(masked_image)  # Apply the mask
    
sentinel1 = ee.ImageCollection('COPERNICUS/S1_GRD')\
            .filterBounds(amazon_region)\
            .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))\
            .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH'))\
            .filter(ee.Filter.eq('instrumentMode', 'IW'))\
            .filter(ee.Filter.inList('orbitProperties_pass', ['ASCENDING', 'DESCENDING']))

# Select the VV and VH bands
sentinel1_vv = sentinel1.select('VV')
sentinel1_vh = sentinel1.select('VH')

# Apply the masking function to each image in the collection
sentinel1_vv_masked = sentinel1_vv.map(mask_edges)
sentinel1_vv_final = ee.Image.cat(
        sentinel1_vv_masked.filter(spring).mean(),
        sentinel1_vv_masked.filter(lateSpring).mean(),
        sentinel1_vv_masked.filter(summer).mean());

# Apply the masking function to each image in the collection
sentinel1_vh_masked = sentinel1_vh.map(mask_edges)
sentinel1_vh_final = ee.Image.cat(
        sentinel1_vh_masked.filter(spring).mean(),
        sentinel1_vh_masked.filter(lateSpring).mean(),
        sentinel1_vh_masked.filter(summer).mean());

# load sentinel-2 data 
sentinel2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
                .filterBounds(amazon_region) \
                .filterDate('2022-01-01', '2022-12-31') \
                .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))

# Apply the mask using bitwise AND to check that both cloud and cirrus bits are 0
def mask_clouds(image):
    qa = image.select('QA60')  # Select the QA60 band that holds cloud and cirrus bit information
    mask = qa.bitwiseAnd(cloud_bit_mask).eq(0).And(qa.bitwiseAnd(cirrus_bit_mask).eq(0))
    return image.updateMask(mask)
    
sentinel2 = sentinel2.map(mask_clouds)

# Calculate NDVI
ndvi = sentinel2.map(lambda image: image.normalizedDifference(['B8', 'B4']).rename('NDVI')).median()
# Calculate EVI
def calculate_evi(image):
    return image.expression(
        '2.5 * ((B8 - B4) / (B8 + 6 * B4 - 7.5 * B2 + 1))',
        {
            'B8': image.select('B8'),
            'B4': image.select('B4'),
            'B2': image.select('B2')
        }).rename('EVI')

evi = sentinel2.map(calculate_evi).median()

# Load Landsat 8 Surface Reflectance data
landsat8 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
              .filterBounds(amazon_region) \
              .filterDate('2022-01-01', '2022-12-31') \
              .filter(ee.Filter.lt('CLOUD_COVER', 50))
# Calculate NDVI for Landsat 8
landsat_ndvi = landsat8.map(lambda image: image.normalizedDifference(['SR_B5', 'SR_B4']).rename('NDVI')).median()

# Load the GLO-30 DEM data from the COPERNICUS collection
dem = ee.ImageCollection('COPERNICUS/DEM/GLO30') \
          .filterBounds(amazon_region) \
          .mosaic()

# Calculate slope in degrees
slope = ee.Terrain.slope(dem)
# Calculate aspect in degrees
aspect = ee.Terrain.aspect(dem)

# # Load the JRC Global Surface Water dataset
# water_dataset = ee.Image('JRC/GSW1_4/GlobalSurfaceWater')

# # Use the "occurrence" band to represent water presence (values > 50 indicate permanent water presence)
# river_band = water_dataset.select('occurrence').gt(50).rename('water_presence')

# # Load the HydroSHEDS river network dataset
# # Replace 'WWF/HydroSHEDS/v1/FreeFlowingRivers' with a specific dataset if needed
# river_dataset = ee.FeatureCollection('WWF/HydroSHEDS/v1/FreeFlowingRivers')

# # Filter the dataset to the Amazon region
# amazon_rivers = river_dataset.filterBounds(amazon_region)

# # Create a raster layer from the river features
# # This creates a binary mask where rivers are 1 and other areas are 0
# river_raster = amazon_rivers.reduceToImage(properties=[], reducer=ee.Reducer.constant(1)).unmask(0).rename('river_presence')


# Stack all features (Sentinel-1, Sentinel-2, Landsat, DEM) and add river_band as an additional feature
feature_stack = sentinel1_vh_final.addBands(sentinel1_vv_final) \
                                  .addBands(ndvi) \
                                  .addBands(evi) \
                                  .addBands(landsat_ndvi) \
                                  .addBands(dem) \
                                  .addBands(slope) \
                                  .addBands(aspect) 
  # .addBands(river_band)  # Add water presence as a band

print("type of feature_stack", type(feature_stack))

# Load GEDI Level 4A data
gedi_all = ee.FeatureCollection('LARSE/GEDI/GEDI04_A_002_INDEX')\
        .filter('time_start > "2022-01-01" && time_end < "2022-03-30"')\
        .filterBounds(amazon_region);

# Get the list of table_id values
table_ids = gedi_all.aggregate_array('table_id').getInfo()

print("lenght of table id", len(table_ids))

# Initialize an empty FeatureCollection
gedi = ee.FeatureCollection([])

i=0
# Loop through each table ID and merge them
for table_id in table_ids:
    # Load each table and merge
    table = ee.FeatureCollection(table_id).filterBounds(amazon_region);
    gedi = gedi.merge(table)

# Filter invalid AGBD measurements based on 'l4_quality_flag'
gedi = gedi.filter(ee.Filter.eq('l4_quality_flag', 1))
# print('Number of 1st filtered GEDI points:', gedi.size().getInfo())  

# Filter to keep only points with non-null 'agbd' values
gedi = gedi.filter(ee.Filter.notNull(['agbd_se']))

# Remove unreliable measurements with a relative standard error > 50%
# gedi = gedi.filter(
#     ee.Filter.lt(ee.Number(gedi.get('agbd_se')).divide(gedi.get('agbd')).multiply(100), 10)
# )
print("JJ")
# print('Number of 2nd filtered GEDI points:', gedi.size().getInfo()) 

# Filter to keep only points with non-null 'agbd' values
gedi = gedi.filter(ee.Filter.notNull(['agbd']))
# print('Number of 3rd filtered GEDI points:', gedi.size().getInfo())

# Convert agbd to integer for smileGradientTreeBoost
gedi = gedi.map(lambda feature: feature.set('agbd', ee.Number(feature.get('agbd')).toInt()))

# Sample the remote sensing data at GEDI footprint locations
training_data = feature_stack.sampleRegions(
        collection=gedi,
        properties=['agbd'],
        scale=100,
        tileScale=16,
        geometries=True
)
# task = ee.batch.Export.image.toCloudStorage(
#         image=training_data,         # Pass image directly, not in a dictionary
#         description='AGB_Prediction_GCS',  # Task description
#         bucket='test-agb-bucket',    # GCS bucket name
#         fileNamePrefix='agb_training_data',  # Prefix for the file name in the bucket
#         scale=100,                    # Scale in meters
#         maxPixels=1e8,               # Max number of pixels
#         fileFormat='GeoTIFF'          # File format
# )

# Function to check task status periodically
def check_task_status(task):
    while True:
        status = task.status()
        state = status['state']
        print('Current task state:', state)
        
        if state == 'COMPLETED':
            print("Export task completed successfully.")
            break
        elif state == 'FAILED':
            print("Export task failed:", status)
            break
        
        # Wait before checking the status again
        time.sleep(30)
        
# Export the FeatureCollection to Google Drive
task = ee.batch.Export.table.toCloudStorage(
    collection=training_data,
    description='Export_training_data_to_CloudStorage',
    bucket='test-agb-bucket',  # Replace with your GCS bucket name
    fileNamePrefix='agb_training_data',  # Replace with desired file path and prefix
    fileFormat='CSV'  # Format can be 'CSV', 'GeoJSON', etc.
)

# Start the export task
task.start()
status = task.status()
print(status)
print("Export started")
# Start monitoring the task
check_task_status(task)

{'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-64.38047870088286, -20.202095534083398], [-64.37960473704723, -20.20884665933983], [-64.37538192638276, -20.21042958088765], [-64.37538192638276, -20.202095528644666], [-64.38047870088286, -20.202095534083398]]]}, 'id': '00000000000000000000', 'properties': {'Id': 0, 'area': 0.46}}
START
type of feature_stack <class 'ee.image.Image'>
lenght of table id 439
JJ
{'state': 'READY', 'description': 'Export_training_data_to_CloudStorage', 'priority': 100, 'creation_timestamp_ms': 1733477495928, 'update_timestamp_ms': 1733477495928, 'start_timestamp_ms': 0, 'task_type': 'EXPORT_FEATURES', 'id': 'I3MBPOPRZIAV7NYHFZILN3MI', 'name': 'projects/test-project-agb/operations/I3MBPOPRZIAV7NYHFZILN3MI'}
Export started
Current task state: READY
Current task state: READY
Current task state: READY


KeyboardInterrupt: 