In [None]:
import ee
import geemap
import xee  # Import the Xarray Earth Engine module
import xarray as xr
import time
import sys

# Increase recursion limit for large collections
sys.setrecursionlimit(10000)

# Initialize Earth Engine
ee.Initialize()

# Define the region of interest (Amazon Basin)
amazon_region = ee.FeatureCollection("projects/test-project-agb/assets/AmazonBasinLimits-master")

# Define the bitmask for Sentinel-2 cloud and cirrus
cloud_bit_mask = ee.Number(1 << 5)  # Cloud bit is in the 6th bit position
cirrus_bit_mask = ee.Number(1 << 9)  # Cirrus bit is in the 10th bit position

# Sentinel-1 Processing
def process_sentinel1():
    sentinel1 = ee.ImageCollection('COPERNICUS/S1_GRD') \
        .filterBounds(amazon_region) \
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')) \
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH')) \
        .filter(ee.Filter.eq('instrumentMode', 'IW')) \
        .filter(ee.Filter.inList('orbitProperties_pass', ['ASCENDING', 'DESCENDING']))

    # Select and mask VV
    sentinel1_vv = sentinel1.select('VV')
    spring = ee.Filter.date('2022-03-01', '2022-04-20')
    lateSpring = ee.Filter.date('2022-04-21', '2022-06-10')
    summer = ee.Filter.date('2022-06-11', '2022-08-31')
    
    sentinel1_vv_masked = sentinel1_vv.map(mask_edges)
    vv_mean = sentinel1_vv_masked.filter(spring).mean().addBands(
        sentinel1_vv_masked.filter(lateSpring).mean()).addBands(sentinel1_vv_masked.filter(summer).mean())
    
    # Convert to Xarray DataArray
    vv_array = xee.to_xarray(vv_mean.clip(amazon_region))
    
    # Repeat for VH
    sentinel1_vh = sentinel1.select('VH')
    sentinel1_vh_masked = sentinel1_vh.map(mask_edges)
    vh_mean = sentinel1_vh_masked.filter(spring).mean().addBands(
        sentinel1_vh_masked.filter(lateSpring).mean()).addBands(sentinel1_vh_masked.filter(summer).mean())
    
    vh_array = xee.to_xarray(vh_mean.clip(amazon_region))
    
    return vv_array, vh_array

def mask_edges(image):
    edge = image.lt(-30.0)  # Define an edge mask where values are less than -30
    masked_image = image.mask().And(edge.Not())  # Mask out edges
    return image.updateMask(masked_image)

# Sentinel-2 Processing (NDVI)
def process_sentinel2():
    sentinel2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
        .filterBounds(amazon_region) \
        .filterDate('2022-03-01', '2022-08-31') \
        .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))

    def mask_clouds(image):
        qa = image.select('QA60')
        mask = qa.bitwiseAnd(cloud_bit_mask).eq(0).And(qa.bitwiseAnd(cirrus_bit_mask).eq(0))
        return image.updateMask(mask)

    sentinel2 = sentinel2.map(mask_clouds)
    ndvi = sentinel2.map(lambda image: image.normalizedDifference(['B8', 'B4']).rename('NDVI')).median()
    
    # Convert to Xarray DataArray
    ndvi_array = xee.to_xarray(ndvi.clip(amazon_region))
    return ndvi_array

# DEM and Slope Processing
def process_dem():
    dem = ee.Image('COPERNICUS/DEM/GLO30').clip(amazon_region)
    slope = ee.Terrain.slope(dem)
    
    # Convert to Xarray DataArray
    slope_array = xee.to_xarray(slope)
    return slope_array

# Load and process GEDI data
def process_gedi():
    gedi_all = ee.FeatureCollection('LARSE/GEDI/GEDI04_A_002_INDEX') \
        .filter('time_start > "2022-03-01" && time_end < "2022-08-31"') \
        .filterBounds(amazon_region)

    # Filter and extract the required information
    gedi = gedi_all.filter(ee.Filter.eq('l4_quality_flag', 1)) \
        .filter(ee.Filter.notNull(['agbd'])) \
        .filter(ee.Filter.notNull(['agbd_se']))
    
    # Extract the 'agbd' field and set it as integer
    gedi = gedi.map(lambda feature: feature.set('agbd', ee.Number(feature.get('agbd')).toInt()))
    
    # Sample the remote sensing data at GEDI footprint locations
    feature_stack = process_feature_stack()
    
    training_data = feature_stack.sampleRegions(
        collection=gedi,
        properties=['agbd'],
        scale=100,
        tileScale=16,
        geometries=True
    )
    
    return training_data

# Combine feature stack for modeling
def process_feature_stack():
    vv_array, vh_array = process_sentinel1()
    ndvi_array = process_sentinel2()
    slope_array = process_dem()

    # Combine into an Xarray Dataset
    dataset = xr.Dataset({
        'VV': vv_array,
        'VH': vh_array,
        'NDVI': ndvi_array,
        'Slope': slope_array
    })

    return dataset

# Main pipeline execution
def main():
    # Process data
    vv_array, vh_array = process_sentinel1()
    ndvi_array = process_sentinel2()
    slope_array = process_dem()
    
    # Combine into a single Xarray dataset
    dataset = xr.Dataset({
        'VV': vv_array,
        'VH': vh_array,
        'NDVI': ndvi_array,
        'Slope': slope_array
    })
    
    # Save to NetCDF file for further analysis
    # dataset.to_netcdf("agb_features.nc")
    print("Data saved as NetCDF.")
    
    # Proceed with modeling (e.g., Random Forest) or further analysis
    # If needed, load the NetCDF file for local ML tasks:
    # data = xr.open_dataset("agb_features.nc")
    # Perform ML modeling here
    
if __name__ == "__main__":
    main()

