In [None]:
import os
import json
import yaml
import uuid
import time
from datetime import datetime
from osgeo import ogr
from shapely.geometry import shape, mapping, Polygon as ShapelyPolygon, MultiPolygon
from pyproj import CRS, Transformer
from labelbox import Client
from labelbox.data.annotation_types import Polygon as LBPolygon, Point, Label
from labelbox.data.annotation_types.annotation import ObjectAnnotation
from labelbox.data.serialization.ndjson import NDJsonConverter
from labelbox.schema.annotation_import import MALPredictionImport

In [None]:
import geopandas as gpd
import os
from shapely.geometry import Polygon

# === User inputs ===
shapefile_path = r"E:\planetscope_lake_ice\Data\Input\Global Mollweide Grid\global_grid_50km_filtered.shp"
cell_ids_to_find = [9843, 20655, 11753, 5136, 36290, 1340, 14215, 2915, 41698, 39599, 43394, 43754, 9299, 44651, 8208, 13666, 6141, 53431, 8819, 14049, 53715, 8203, 7109, 40502, 18065, 28506, 5825, 12704, 16147, 181, 12874, 10419, 9646, 8041, 49626, 30759, 16454, 9140, 7263, 5781, 13654, 16676, 50135, 58739]

# === Load global grid ===
grid = gpd.read_file(shapefile_path)

# === Prepare output dictionary ===
bounding_boxes = {}

for cid in cell_ids_to_find:
    cell = grid.loc[grid["cell_id"] == cid]
    if cell.empty:
        print(f"⚠️ Cell ID {cid} not found — skipping")
        continue

    # Extract geometry in grid CRS (likely Mollweide)
    geom_moll = cell.geometry.iloc[0]
    coords = list(geom_moll.exterior.coords)

    # Put into a GeoDataFrame for coordinate transformation
    verts = gpd.GeoDataFrame(geometry=[Polygon(coords)], crs=grid.crs)

    # Reproject to EPSG:4326 (lat/lon)
    verts_ll = verts.to_crs(epsg=4326)
    geom_ll = verts_ll.geometry.iloc[0]

    # Extract corner coordinates
    latlon_coords = list(geom_ll.exterior.coords)

    # Build GeoJSON-style dict
    rect = {
        "type": "Polygon",
        "coordinates": [latlon_coords]
    }

    # Add to dictionary
    site_name = f"Cell_{cid}"
    bounding_boxes[site_name] = rect

print(f"✅ Constructed bounding_boxes for {len(bounding_boxes)} cells")
bounding_boxes

In [None]:
# --- User-defined variables ---
pld_gdb_path = r"E:\planetscope_lake_ice\Data\Input\PLD\SWOT_PLD_v201_02042025_attributes_updated.gdb"
output_root  = r"E:\planetscope_lake_ice\Data\Input"
config_file  = r"D:\planetscope_lake_ice\labelbox_water_body_delineation_config.yaml"

"""
# If you want to do this manually, input a dictionary of sites and AOI bounding boxes in Lat, Lon coordinates (EPSG 4326)
bounding_boxes = {
    "Cell_9843": {
        "type": "Polygon",
        "coordinates": [[
            [-93.767792, 57.174268],
            [-94.500054, 57.174268],
            [-95.340362, 57.683786],
            [-94.601588, 57.683786],
            [-93.767792, 57.174268]
        ]]
    },
}"""

# --- Labelbox setup ---
with open(config_file, "r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)

client = Client(cfg["api_key"])
project = client.get_project(cfg["project_id"])

print(f"Connected to project '{project.name}' (ID: {project.uid})")

In [None]:
def clip_lakes_within_bbox(pld_gdb_path, rect, study_site, output_root):
    ds = ogr.Open(pld_gdb_path, 0)
    if ds is None:
        raise RuntimeError(f"Cannot open GDB: {pld_gdb_path}")
    layer = ds.GetLayerByIndex(0)
    in_srs = layer.GetSpatialRef()
    print(f"Input CRS: {in_srs.ExportToProj4()}")

    aoi_poly = shape(rect)
    minx, miny, maxx, maxy = aoi_poly.bounds
    layer.SetSpatialFilterRect(minx, miny, maxx, maxy)

    # Output directories
    site_base = os.path.join(output_root, f"{study_site} 50x50 km - PLD")
    shp_dir = os.path.join(site_base, f"{study_site} Lakes Raw PLD - Shapefile")
    os.makedirs(shp_dir, exist_ok=True)
    shp_path = os.path.join(shp_dir, f"{study_site}_50x50km_lakes.shp")

    # Write shapefile
    driver = ogr.GetDriverByName("ESRI Shapefile")
    if os.path.exists(shp_path):
        driver.DeleteDataSource(shp_path)
    out_ds = driver.CreateDataSource(shp_path)
    out_layer = out_ds.CreateLayer("clipped", srs=in_srs, geom_type=ogr.wkbPolygon)
    in_defn = layer.GetLayerDefn()
    for i in range(in_defn.GetFieldCount()):
        out_layer.CreateField(in_defn.GetFieldDefn(i))

    lakes_data, count = [], 0
    for feat in layer:
        geom = feat.GetGeometryRef()
        if geom is None:
            continue
        shapely_geom = shape(json.loads(geom.ExportToJson()))
        if aoi_poly.contains(shapely_geom):
            out_feat = ogr.Feature(out_layer.GetLayerDefn())
            for i in range(in_defn.GetFieldCount()):
                out_feat.SetField(in_defn.GetFieldDefn(i).GetNameRef(), feat.GetField(i))
            out_feat.SetGeometry(ogr.CreateGeometryFromJson(json.dumps(mapping(shapely_geom))))
            out_layer.CreateFeature(out_feat)
            out_feat = None

            lake_id = feat.GetField('lake_id') or str(uuid.uuid4())
            lakes_data.append({
                "lake_id": lake_id,
                "geometry": shapely_geom,
                "properties": {
                    in_defn.GetFieldDefn(i).GetNameRef(): feat.GetField(i)
                    for i in range(in_defn.GetFieldCount())
                }
            })
            count += 1
    out_ds = None
    print(f"Saved {count} lakes → {shp_path}")
    return shp_path, lakes_data


def create_xyz_tile_data_row(client, project, study_site, bbox_geojson):
    dataset_name = "Lake_Ice_Water_Bodies"
    try:
        dataset = next(ds for ds in client.get_datasets() if ds.name == dataset_name)
    except StopIteration:
        raise RuntimeError(f"Dataset {dataset_name} not found")

    coords = bbox_geojson["coordinates"][0]
    lons = [c[0] for c in coords]
    lats = [c[1] for c in coords]
    bounds = [[min(lats), min(lons)], [max(lats), max(lons)]]

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    external_id = f"{study_site}_{timestamp}"

    dr_dict = {
        "row_data": {
            "tile_layer_url": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}",
            "bounds": bounds,
            "zoom_levels": {"min": 1, "max": 18},
            "geometry": bbox_geojson
        },
        "media_type": "TMS_GEO",
        "global_key": external_id,
        "external_id": external_id,
    }

    task = dataset.create_data_rows([dr_dict])
    task.wait_till_done()
    if task.errors:
        raise RuntimeError("Upload errors: " + str(task.errors))
    dr_id = task.result[0]["id"]

    data_row = client.get_data_row(dr_id)
    print(f"Created new DataRow {data_row.uid}")

    project.create_batch(
        name=f"{study_site}_batch_{timestamp}",
        data_rows=[data_row.uid],
        priority=1
    )

    return data_row, dataset


def upload_prelabels_with_boundary(client, project, data_row, lakes_data, rect, study_site):
    ontology = project.ontology()
    polygon_tool = next((t for t in ontology.normalized["tools"] if t["name"].lower() == "lakes"), None)
    boundary_tool = next((t for t in ontology.normalized["tools"] if t["name"].lower() == "bounding box"), None)
    if not polygon_tool or not boundary_tool:
        raise ValueError("Required ontology tools not found (expecting 'Lakes' and 'Bounding box')")

    labels = []

    # Add bounding box polygon
    boundary_points = [Point(x=lon, y=lat) for lon, lat in rect["coordinates"][0]]
    boundary_poly = LBPolygon(points=boundary_points)
    boundary_label = Label(
        data={"uid": data_row.uid},
        annotations=[ObjectAnnotation(name=boundary_tool["name"], value=boundary_poly)]
    )
    labels.append(boundary_label)

    # Add each lake polygon
    for lake in lakes_data:
        geom = lake["geometry"]
        if geom.is_empty:
            continue
        polys = [geom] if geom.geom_type == "Polygon" else list(geom.geoms)
        for poly in polys:
            coords = list(poly.exterior.coords)[:-1]
            lb_poly = LBPolygon(points=[Point(x=lon, y=lat) for lon, lat in coords])
            ann = ObjectAnnotation(name=polygon_tool["name"], value=lb_poly)
            labels.append(Label(data={"uid": data_row.uid}, annotations=[ann]))

    ndjson = list(NDJsonConverter.serialize(labels))
    print(f"Prepared {len(ndjson)} polygons (boundary + lakes).")

    job = MALPredictionImport.create_from_objects(
        client=client,
        project_id=project.uid,
        name=f"{study_site}_prelabels_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
        predictions=ndjson
    )

    job.wait_until_done()
    if job.errors:
        print("Errors during upload:", job.errors)
    else:
        print(f"Uploaded boundary and lakes for {study_site}.")
    return job

In [None]:
for site_name, bbox in bounding_boxes.items():
    print(f"\n Processing site: {site_name}")

    # Clip and extract lakes for AOI
    shp_path, lakes_data = clip_lakes_within_bbox(pld_gdb_path, bbox, site_name, output_root)
    print(f"→ Found {len(lakes_data)} lakes")

    if not lakes_data:
        print("No lakes found — skipping upload")
        continue

    # Create Labelbox TMS DataRow
    data_row, dataset = create_xyz_tile_data_row(client, project, site_name, bbox)

    # Upload prelabels (lakes + AOI boundary)
    job = upload_prelabels_with_boundary(client, project, data_row, lakes_data, bbox, site_name)

    print(f"Finished upload for {site_name}")