In [1]:
import ee
import geemap
import os
from google.cloud import storage
import rasterio
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
from rasterio.transform import from_origin
import json

# Authenticate and initialize Earth Engine
ee.Authenticate()
ee.Initialize()

print("1st step")
# Google Cloud Storage configuration
GCS_BUCKET = "test-agb-bucket"
GCS_OUTPUT_PATH = "path/to/output"

# # Define the study area
# study_area = ee.Geometry.Polygon([[
#     [-70, -10],
#     [-70, 0],
#     [-50, 0],
#     [-50, -10],
#     [-70, -10]
# ]])

# # Load datasets
# gedi = ee.ImageCollection("LARSE/GEDI/GEDI02_A_002_MONTHLY") \
#     .filterBounds(study_area) \
#     .select('rh98')
# sentinel1 = ee.ImageCollection("COPERNICUS/S1_GRD") \
#     .filterBounds(study_area) \
#     .filter(ee.Filter.eq('orbitProperties_pass', 'DESCENDING')) \
#     .select(['VV', 'VH'])
# sentinel2 = ee.ImageCollection("COPERNICUS/S2") \
#     .filterBounds(study_area) \
#     .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20)) \
#     .select(['B2', 'B3', 'B4', 'B8'])
# landsat = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2") \
#     .filterBounds(study_area) \
#     .select(['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5'])
# dem = ee.Image("USGS/SRTMGL1_003")

# # Combine all datasets into a single stack
# def preprocess_image(image):
#     return image.clip(study_area)

# stack = ee.Image.cat([
#     gedi.mean(),
#     sentinel1.mean(),
#     sentinel2.median(),
#     landsat.median(),
#     dem
# ].map(preprocess_image))

# # Export stack to GCS
# task = ee.batch.Export.image.toCloudStorage(
#     image=stack,
#     bucket=GCS_BUCKET,
#     fileNamePrefix=GCS_OUTPUT_PATH,
#     scale=30,
#     region=study_area,
#     maxPixels=1e13
# )
# task.start()

# print("Data export started. Monitor task in GEE.")

# Function to parse the GeoJSON string and return a Point geometry
def parse_geojson(geo_str):
    geo_dict = json.loads(geo_str)  # Parse the GeoJSON string into a dictionary
    coordinates = geo_dict.get("coordinates", [])
    return Point(coordinates[0], coordinates[1])  # Create a Point geometry

# Process data from GCS in Python
def download_and_process_data():
    client = storage.Client()
    bucket = client.get_bucket(GCS_BUCKET)

    # Download data from GCS
    blob = bucket.blob(f"agb_training_data_10.csv")
    blob.download_to_filename("agb_training_data_10.csv")
    print("Data downloaded.")

    # # # Read data using Rasterio
    # # with rasterio.open("stack.tif") as src:
    # #     stack_data = src.read()
    # #     profile = src.profile

    # Load the dataset from a CSV file
    csv_file = "agb_training_data_10.csv"
    data = pd.read_csv(csv_file)
    print(data['.geo'].head())


    # Apply the function to parse the .geo column and convert to geometry
    data['geometry'] = data['.geo'].apply(parse_geojson)
    
    # Convert to GeoDataFrame
    geo_df = gpd.GeoDataFrame(data, geometry='geometry', crs="EPSG:4326")
    
    # Check the first few rows of the GeoDataFrame
    print(geo_df[['geometry']].head())
    
    # Separate predictors (X) and target (y)
    predictors = ['NDVI', 'EVI', 'VV', 'VH', 'DEM', 'slope', 'aspect' ]  # Replace with your feature column names
    target = 'agbd'  # Replace with your target column name

    X = geo_df[predictors]  # Predictor variables
    y = geo_df[target]      # Target variable
    return X, y, geo_df

predictors, target, geo_df = download_and_process_data()

# # Train a machine learning model
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

predictions = model.predict(X_test)
print("Model RMSE:", np.sqrt(mean_squared_error(y_test, predictions)))
predictions = model.predict(predictors)
print(len(geo_df))

print("predictions=", len(predictions))
# Prepare the predicted AGB values as a 2D NumPy array (same size as the dataset)
agb_map = predictions.reshape((len(geo_df), 1))  # If you need to reshape for a specific grid, modify it accordingly

# Add the predicted AGB values to the GeoDataFrame
geo_df['predicted_agb'] = agb_map

# Define spatial metadata (adjust based on your dataset's extent and resolution)
min_lat = geo_df['geometry'].y.min()
max_lat = geo_df['geometry'].y.max()
min_lon = geo_df['geometry'].x.min()
max_lon = geo_df['geometry'].x.max()

# Set pixel size (resolution, adjust based on data)
pixel_size = 0.01  # Modify this based on your data's spatial resolution
transform = from_origin(min_lon, max_lat, pixel_size, pixel_size)  # GeoTIFF transformation

# Save the AGB map as a GeoTIFF
output_tiff = "agb_map.tif"
with rasterio.open(
    output_tiff,
    "w",
    driver="GTiff",
    height=agb_map.shape[0],
    width=agb_map.shape[1],
    count=1,  # Single-band raster
    dtype=agb_map.dtype,
    crs="EPSG:4326",
    transform=transform,
) as dst:
    dst.write(agb_map, 1)  # Write the AGB data to the first band

print(f"GeoTIFF saved: {output_tiff}")

# Initialize a map
m = geemap.Map()


# Add the AGB map GeoTIFF to the map with additional parameters
m.add_raster(output_tiff, 
             layer_name="AGB Map", 
             colormap="viridis", 
             vmin=0, 
             vmax=100,  # Adjust the range based on your data
             nodata_value=-9999)  # If your data has no data values, set this

# Set the map center based on the extent of the data
m.set_center((min_lon + max_lon) / 2, (min_lat + max_lat) / 2, zoom=6)

# Display the map
m

1st step
Data downloaded.
0    {"geodesic":false,"type":"Point","coordinates"...
1    {"geodesic":false,"type":"Point","coordinates"...
2    {"geodesic":false,"type":"Point","coordinates"...
3    {"geodesic":false,"type":"Point","coordinates"...
4    {"geodesic":false,"type":"Point","coordinates"...
Name: .geo, dtype: object
                    geometry
0  POINT (-77.03548 3.16342)
1  POINT (-76.94385 3.29188)
2  POINT (-77.03009 3.16072)
3  POINT (-77.14777 3.00352)
4  POINT (-76.94295 3.28379)
Model RMSE: 124.84348288087334
3826
predictions= 3826
GeoTIFF saved: agb_map.tif


TypeError: 'dict' object is not callable