In [8]:
import ee
import geemap
import zarr
import numpy as np
from google.cloud import storage
import sys
sys.setrecursionlimit(5000) 

# Initialize Earth Engine
ee.Initialize()

amazon_region = ee.Geometry.Polygon([[[-80.0, 10.0],
            [-20.0, 10.0],
            [-20.0, -50.0],                           
            [-80.0, -50.0],
            [-80.0, 10.0]]]) 

# Load GEDI L4A data from Earth Engine
gedi_all = ee.FeatureCollection('LARSE/GEDI/GEDI04_A_002_INDEX')\
        .filter('time_start > "2022-01-01" && time_end < "2022-03-30"')\
        .filterBounds(amazon_region);

# Get the list of table_id values
table_ids = gedi_all.aggregate_array('table_id').getInfo()

print("lenght of table id", len(table_ids))

# Initialize an empty FeatureCollection
gedi = ee.FeatureCollection([])

# Loop through each table ID and merge them
for table_id in table_ids:
    # Load each table and merge
    table = ee.FeatureCollection(table_id).filterBounds(amazon_region);
    gedi = gedi.merge(table)

# Filter invalid AGBD measurements based on 'l4_quality_flag'
gedi = gedi.filter(ee.Filter.eq('l4_quality_flag', 1))
# print('Number of 1st filtered GEDI points:', gedi.size().getInfo())  

# Filter to keep only points with non-null 'agbd' values
gedi = gedi.filter(ee.Filter.notNull(['agbd_se']))

# Get the data as a list of features
gedi_data_list = gedi.getInfo()['features']

# Convert features into a NumPy array for storage
# Convert the filtered data to a NumPy array (or a 2D list)
# This step can vary based on the dataset size. Use getInfo() for smaller datasets.
def extract_properties(features):
    """
    Convert Earth Engine feature collection to a NumPy array of properties.
    """
    properties = [f['properties'] for f in features]
    return np.array(properties)

gedi_np_data = extract_properties(gedi_data_list)

# Save the NumPy data to Zarr format
zarr_file = 'gedi_data.zarr'
zarr_data = zarr.open(zarr_file, mode='w', shape=gedi_np_data.shape, dtype=gedi_np_data.dtype)
zarr_data[:] = gedi_np_data

print(f"GEDI data saved to Zarr format: {zarr_file}")

# Initialize Google Cloud Storage client
storage_client = storage.Client()

# Define your bucket and file path
bucket_name = 'test-agb-bucket'  # Replace with your bucket name


# Upload the Zarr file to Google Cloud Storage
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob('Gedi_L4A/2022/' + zarr_file)

# Upload the local Zarr file to the Cloud Storage bucket
blob.upload_from_filename(zarr_file)

print(f"Zarr file uploaded to gs://{bucket_name}/{zarr_file}")


lenght of table id 1160


EEException: merge() is too deeply nested.