In [9]:
import ee
import geemap
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import numpy as np
from google.cloud import storage
import os

# Initialize Google Earth Engine
ee.Initialize()

# Define the region of interest (Amazon Basin)
amazon_basin = ee.FeatureCollection("users/your_username/amazon_basin_polygon")



# Initialize Google Earth Engine
ee.Initialize()

# Define Area of Interest (Amazon Basin)
# aoi = ee.FeatureCollection('users/your_account/amazon_basin_polygon')
aoi = ee.Geometry.Polygon([[[-78.0, 7.0],
            [-52.0, 7.0],
            [-52.0, -17.0],                           
            [-78.0, -17.0],
            [-78.0, 7.0]]]) 

def gnerate_map():
    # Filter Sentinel-1 SAR data
    s1 = ee.ImageCollection('COPERNICUS/S1_GRD') \
        .filterBounds(aoi) \
        .filterDate('2022-01-01', '2022-12-31') \
        .filter(ee.Filter.eq('instrumentMode', 'IW')) \
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV'))\
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VH'))
    
    # Filter Sentinel-2 optical data
    s2 = ee.ImageCollection('COPERNICUS/S2_SR') \
        .filterBounds(aoi) \
        .filterDate('2022-01-01', '2022-12-31') \
        .map(lambda image: image.updateMask(image.select('QA60').lt(1))) \
        .select(['B2', 'B3', 'B4', 'B8', 'B11', 'B12'])
    
    # Add vegetation indices (e.g., NDVI)
    def add_ndvi(image):
        ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI')
        return image.addBands(ndvi)
    
    s2 = s2.map(add_ndvi)
    
    # Load DEM
    dem = ee.Image('USGS/SRTMGL1_003').clip(aoi)
    
    # Landsat data
    landsat = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
        .filterBounds(aoi) \
        .filterDate('2022-01-01', '2022-12-31')
    
    # Combine datasets
    combined = s1.median().addBands(s2.median()).addBands(dem)
    
    # Load GEDI data
        # Load GEDI Level 4A data
    gedi_all = ee.FeatureCollection('LARSE/GEDI/GEDI04_A_002_INDEX')\
            .filter('time_start > "2022-01-01" && time_end < "2022-03-31"')\
            .filterBounds(aoi);

    # Get the list of table_id values
    table_ids = gedi_all.aggregate_array('table_id').getInfo()
    print("Lenght of table id", len(table_ids))
    
    # Initialize an empty FeatureCollection
    gedi = ee.FeatureCollection([])

    # Loop through each table ID and merge them
    for table_id in table_ids:
        # Load each table and merge
        table = ee.FeatureCollection(table_id).filterBounds(aoi)
        # image = image.reproject('EPSG:4326', None, 100)
        gedi = gedi.merge(table)
        # print("Size of Gedi", gedi.size().getInfo())

    # Filter invalid AGBD measurements based on 'l4_quality_flag'
    gedi = gedi.filter(ee.Filter.eq('l4_quality_flag', 1))
    
    # Filter to keep only points with non-null 'agbd' values
    gedi = gedi.filter(ee.Filter.notNull(['agbd']))

    
    # Sample training data
    training_data = combined.sampleRegions(
        collection=gedi,
        properties=['AGB'],  # Replace with the appropriate biomass field
        scale=30
    )
    
    # Convert to NumPy arrays for ML training
    data = np.array(training_data.getInfo()['features'])
    X = np.array([d['properties'] for d in data])
    y = np.array([d['properties']['AGB'] for d in data])
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train Random Forest model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Predict and evaluate
    y_pred = model.predict(X_test)
    print("R² Score:", r2_score(y_test, y_pred))
    
    # # Predict on the entire region
    # agb_map = combined.expression(
    #     'RF_PREDICTION',
    #     {
    #         'model': model,
    #         # Add any additional parameters for the prediction
    #     }
    # )
    return y_pred


# Export data as NetCDF
def export_to_netcdf(image, region, bucket_name, filename="agb_map.nc"):
    # Define projection and scale
    projection = image.projection()
    scale = projection.nominalScale().getInfo()

    # Export image as GeoTIFF
    task = ee.batch.Export.image.toDrive(
        image=image,
        description='AGB_Map_Export',
        folder='gee_exports',
        fileNamePrefix='agb_map',
        region=region.geometry().bounds().getInfo()['coordinates'],
        scale=scale,
        fileFormat='GeoTIFF'
    )
    task.start()
    task.status()

    # Convert GeoTIFF to NetCDF locally (replace this with cloud conversion if needed)
    local_tif_path = "agb_map.tif"
    local_nc_path = "agb_map.nc"
    os.system(f"gdal_translate -of netCDF {local_tif_path} {local_nc_path}")

    # Upload to Google Cloud Storage
    client = storage.Client()
    bucket = client.get_bucket(bucket_name)
    blob = bucket.blob(filename)
    blob.upload_from_filename(local_nc_path)
    print(f"File saved to GCS as {filename}")

# Main function
if __name__ == "__main__":
    agb = gnerate_map()
    print("end")
    # sentinel1, sentinel2, gedi, dem, landsat = load_data()
    # combined_image = preprocess_data(sentinel1, sentinel2, gedi, dem, landsat)
    # export_to_netcdf(combined_image, amazon_basin, bucket_name="your_bucket_name")


Lenght of table id 464


EEException: Computation timed out.