In [None]:
# ============================================================================
# STEP 1: ENVIRONMENT SETUP
# ============================================================================
# Load environment variables and initialize Google Earth Engine

from dotenv import load_dotenv
load_dotenv()

import ee, eemont
from forestry_carbon_arr.core import ForestryCarbonARR
from forestry_carbon_arr.utils.zarr_utils import save_dataset_efficient_zarr, load_dataset_zarr

import gcsfs
import os

fs = gcsfs.GCSFileSystem(project=os.getenv("GOOGLE_CLOUD_PROJECT"), token='/usr/src/app/user_id.json')

forestry = ForestryCarbonARR(config_path='./00_input/korindo.json')
forestry.initialize_gee()

In [None]:
# ============================================================================
# STEP 2: LOAD AREA OF INTEREST (AOI) but only on the AOI concession
# ============================================================================
# Load the area of interest geometry for reference

from forestry_carbon_arr.core.utils import DataUtils
import geopandas as gpd
import geemap

data_utils = DataUtils(forestry.config, use_gee=True)
aoi_gpd, aoi_ee = data_utils.load_geodataframe_gee('./00_input/korindo_buffer.shp')

aoi_gpd_utm = aoi_gpd.to_crs(epsg=32749)

print(f"âœ… AOI loaded: {len(aoi_gpd_utm)} features")
print(f"   Area: {aoi_gpd_utm.geometry.area.sum()/10000:.2f} hectares")

In [None]:
# zarr_path = 'gs://remote_sensing_saas/01-korindo/planet_obia_arcgis/ds_clean_planet_obia_arcgis_202409_202507_median.zarr'
zarr_path = './01_output/ds_clean_planet_obia_arcgis_202409_202507_median.zarr'

ds_sample = load_dataset_zarr(zarr_path)
ds_sample

In [None]:
## training data setup
import pandas as pd
import geopandas as gpd

point_training_path = './00_input/point_training.shp'
area_training_path = './00_input/area_forest_training.shp'

point_training = gpd.read_file(point_training_path)
area_training = gpd.read_file(area_training_path)

point_training.head()


In [None]:
area_training.head()

In [None]:
import geopandas as gpd
import pandas as pd
import xarray as xr

point_training_utm = point_training.to_crs(epsg=32749)
area_training_utm = area_training.to_crs(epsg=32749)

# 1) Bring the training layers into the same CRS (UTM 49S)
pt_gdf = point_training_utm      # must have a column, e.g. 'type'
polygon_gdf = area_training_utm   # same

# 2) Turn the sample coordinates into a GeoDataFrame of points
samples_df = (
    ds_sample[["coord_x", "coord_y"]]
    .to_dataframe()
    .reset_index()
)
samples_gdf = gpd.GeoDataFrame(
    samples_df,
    geometry=gpd.points_from_xy(samples_df.coord_x, samples_df.coord_y),
    crs="EPSG:32749",
)

# 3) Overlay the training data (points first, then polygons)
with_points = gpd.sjoin(samples_gdf, pt_gdf[["type", "geometry"]], how="left", predicate="intersects")
with_polygons = gpd.sjoin(with_points, polygon_gdf[["type", "geometry"]], how="left", predicate="within", lsuffix="_pt", rsuffix="_poly")

# 4) Decide which label wins (point label takes precedence over polygon here)
groundtruth = with_polygons["type_pt"].fillna(with_polygons["type_poly"])

# 5) Bring the labels back onto the Xarray Dataset
labels_da = xr.DataArray(
    groundtruth.values,
    dims="sample",
    coords={"sample": ds_sample["sample"]},
    name="groundtruth",
)
ds_sample = ds_sample.assign(groundtruth=labels_da.astype("object"))
