In [1]:
import os
import json
import yaml
import uuid
import time
from datetime import datetime
from osgeo import ogr
from shapely.geometry import shape, mapping, Polygon as ShapelyPolygon, MultiPolygon
from pyproj import CRS, Transformer
from ipywidgets import Output
from labelbox import Client
from ipyleaflet import Map, Marker, Polygon as LeafletPolygon, basemaps
from labelbox.data.annotation_types import Polygon as LBPolygon, Point, Label
from labelbox.data.annotation_types.annotation import ObjectAnnotation
from labelbox.schema.ontology import Ontology
from labelbox.schema.data_row import DataRow 
from labelbox.data.serialization.ndjson import NDJsonConverter
from labelbox.schema.annotation_import import MALPredictionImport

In [None]:
# ---- Parameters ----
square_size_km = 50   # box size (50 x 50 km)
initial_location = (35.65, 86.45)  # Starting point - adjust as needed

# ---- Helper to compute box corners ----
def make_bbox(lat0, lon0, square_size_km=50):
    aeqd = CRS.from_proj4(f"+proj=aeqd +lat_0={lat0} +lon_0={lon0} +units=m +datum=WGS84")
    wgs84 = CRS.from_epsg(4326)
    to_aeqd = Transformer.from_crs(wgs84, aeqd, always_xy=True)
    to_wgs84 = Transformer.from_crs(aeqd, wgs84, always_xy=True)
    half_side = (square_size_km * 1000) / 2
    proj_corners = [(-half_side,-half_side),(-half_side,half_side),(half_side,half_side),
                    (half_side,-half_side),(-half_side,-half_side)]
    return [to_wgs84.transform(x,y)[::-1] for x,y in proj_corners]

# ---- Map + drag marker ----
out = Output()
clicked_coords = {'lat0': initial_location[0], 'lon0': initial_location[1]}
marker = Marker(location=initial_location, draggable=True)

# Use the **Leaflet** Polygon for drawing the bounding box
bbox_polygon = LeafletPolygon(
    locations=make_bbox(*initial_location, square_size_km),
    color="red", weight=3, fill=False
)

m = Map(center=initial_location, zoom=7, basemap=basemaps.Esri.WorldImagery,
        scroll_wheel_zoom=True)
m.add_layer(marker)
m.add_layer(bbox_polygon)

def on_drag_end(event=None, **kwargs):
    lat, lon = marker.location
    clicked_coords['lat0'], clicked_coords['lon0'] = lat, lon
    bbox_polygon.locations = make_bbox(lat, lon, square_size_km)
    with out:
        out.clear_output()
        print(f"Selected Center → lat0={lat:.4f}, lon0={lon:.4f}")

marker.observe(on_drag_end, names='location')
display(m, out)

Map(center=[35.65, 86.45], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_o…

Output()

In [None]:
# User-defined variables
study_site = "GR"  # Your site name
pld_gdb_path = r"E:\planetscope_lake_ice\Data\Input\PLD\SWOT_PLD_v201_02042025_attributes_updated.gdb"
output_root = r"E:\planetscope_lake_ice\Data\Input"
config_file = r"D:\planetscope_lake_ice\labelbox_water_body_delineation_config.yaml"

# Build rect polygon from marker location
lat0, lon0 = clicked_coords['lat0'], clicked_coords['lon0']
corners = make_bbox(lat0, lon0, square_size_km)

# GeoJSON rect with proper (lon,lat) ordering
rect = {
    "type": "Polygon",
    "coordinates": [[(lon, lat) for lat, lon in corners]]
}

print(f"Study site: {study_site}")
print(f"Center: {lat0:.6f}, {lon0:.6f}")
print(f"Bounding box created")


"""
rect = { 
	"type": "Polygon", 
	"coordinates": [ [ 
		[-51.158238, 67.256290], 
		[-51.169263, 67.704585], 
		[-49.988726, 67.704585], 
		[-49.999751, 67.256290], 
		[-51.158238, 67.256290], 
	] ] 
}"""

Study site: GR
Center: 68.618536, 36.611939
Bounding box created


'\nrect = { \n\t"type": "Polygon", \n\t"coordinates": [ [ \n\t\t[-51.158238, 67.256290], \n\t\t[-51.169263, 67.704585], \n\t\t[-49.988726, 67.704585], \n\t\t[-49.999751, 67.256290], \n\t\t[-51.158238, 67.256290], \n\t] ] \n}'

In [6]:
def clip_lakes_within_bbox(pld_gdb_path, rect, study_site, output_root):
    ds = ogr.Open(pld_gdb_path, 0)
    if ds is None:
        raise RuntimeError(f"Cannot open GDB: {pld_gdb_path}")
    layer = ds.GetLayerByIndex(0)
    in_srs = layer.GetSpatialRef()
    print("Input CRS:", in_srs.ExportToProj4())

    aoi_poly = shape(rect)
    minx, miny, maxx, maxy = aoi_poly.bounds
    layer.SetSpatialFilterRect(minx, miny, maxx, maxy)

    # Output dirs
    site_base = os.path.join(output_root, f"{study_site} 50x50 km - PLD")
    shp_dir = os.path.join(site_base, f"{study_site} Lakes Raw PLD - Shapefile")
    os.makedirs(shp_dir, exist_ok=True)

    shp_path = os.path.join(shp_dir, f"{study_site}_50x50km_lakes.shp")

    # Write shapefile
    driver = ogr.GetDriverByName("ESRI Shapefile")
    if os.path.exists(shp_path):
        driver.DeleteDataSource(shp_path)
    out_ds = driver.CreateDataSource(shp_path)
    out_layer = out_ds.CreateLayer("clipped", srs=in_srs, geom_type=ogr.wkbPolygon)

    in_defn = layer.GetLayerDefn()
    for i in range(in_defn.GetFieldCount()):
        out_layer.CreateField(in_defn.GetFieldDefn(i))

    lakes_data = []
    count = 0
    for feat in layer:
        geom = feat.GetGeometryRef()
        if geom is None:
            continue
        shapely_geom = shape(json.loads(geom.ExportToJson()))
        if aoi_poly.contains(shapely_geom):
            out_feat = ogr.Feature(out_layer.GetLayerDefn())
            for i in range(in_defn.GetFieldCount()):
                out_feat.SetField(in_defn.GetFieldDefn(i).GetNameRef(), feat.GetField(i))
            out_feat.SetGeometry(ogr.CreateGeometryFromJson(json.dumps(mapping(shapely_geom))))
            out_layer.CreateFeature(out_feat)
            out_feat = None
            lake_id = feat.GetField('lake_id') or str(uuid.uuid4())
            lakes_data.append({
                "lake_id": lake_id,
                "geometry": shapely_geom,
                "properties": {
                    in_defn.GetFieldDefn(i).GetNameRef(): feat.GetField(i)
                    for i in range(in_defn.GetFieldCount())
                }
            })
            count += 1

    out_ds = None
    print(f"Shapefile saved {count} lakes → {shp_path}")

    return shp_path, lakes_data

In [7]:
# Execute the clipping
shp_path, lakes_data = clip_lakes_within_bbox(
    pld_gdb_path, rect, study_site, output_root
)

print(f"\nTotal lakes found: {len(lakes_data)}")
if lakes_data:
    print(f"First lake ID: {lakes_data[0]['lake_id']}")

Input CRS: +proj=longlat +datum=WGS84 +no_defs
Shapefile saved 1280 lakes → E:\planetscope_lake_ice\Data\Input\GR 50x50 km - PLD\GR Lakes Raw PLD - Shapefile\GR_50x50km_lakes.shp

Total lakes found: 1280
First lake ID: 2520565172.0


In [8]:
with open(config_file, "r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)
api_key = cfg["api_key"]
project_id = cfg["project_id"]

client = Client(api_key)
project = client.get_project(project_id)
print(f"Connected to project {project.name} (ID: {project.uid})")

Connected to project Lake Ice Project - Water Body Delineation (ID: cmen9gtep08l007xl1zhp87lk)


In [9]:
def create_xyz_tile_data_row(client, project, study_site, lat0, lon0, bbox_geojson):
    # Get dataset by name
    dataset_name = "Lake_Ice_Water_Bodies"
    try:
        dataset = next(ds for ds in client.get_datasets() if ds.name == dataset_name)
    except StopIteration:
        raise RuntimeError(f"Dataset {dataset_name} not found in your Labelbox account")

    print("Using dataset:", dataset.name, f"({dataset.uid})")

    # Build bounding box
    coords = bbox_geojson["coordinates"][0]
    lons = [c[0] for c in coords]
    lats = [c[1] for c in coords]
    lat_min, lat_max = min(lats), max(lats)
    lon_min, lon_max = min(lons), max(lons)
    bounds = [[lat_min, lon_min], [lat_max, lon_max]]

    # Unique ID for DataRow
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    external_id = f"{study_site}_{timestamp}"

    row_data = {
        "tile_layer_url": "https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}",
        "bounds": bounds,
        "zoom_levels": {"min": 1, "max": 18},
        "geometry": bbox_geojson
    }

    dr_dict = {
        "row_data": row_data,
        "media_type": "TMS_GEO",
        "global_key": external_id,
        "external_id": external_id,
    }

    task = dataset.create_data_rows([dr_dict])
    task.wait_till_done()
    if task.errors:
        raise RuntimeError("Upload errors: " + str(task.errors))

    dr_id = task.result[0]["id"]
    data_row = client.get_data_row(dr_id)
    print("Created new DataRow:", data_row.uid)

    # Attach DataRow to the project once
    batch_name = f"{study_site}_batch_{timestamp}"
    project.create_batch(
        name=batch_name,
        data_rows=[data_row.uid],
        priority=1
    )
    print(f"Attached DataRow to project via {batch_name}")

    return data_row, dataset

In [10]:
def convert_lakes_to_labelbox_annotations(lakes_data, data_row_id, study_site):
    """Convert lake geometries to Labelbox annotation format"""
    
    annotations = []
    
    for lake in lakes_data:
        lake_id = lake['lake_id']
        geom = lake['geometry']
        
        # Handle different geometry types
        polygons_to_process = []
        if isinstance(geom, ShapelyPolygon):
            polygons_to_process = [geom]
        elif isinstance(geom, MultiPolygon):
            polygons_to_process = list(geom.geoms)
        else:
            print(f"Skipping non-polygon geometry for lake {lake_id}")
            continue
        
        # Create annotation for each polygon
        for poly_idx, poly in enumerate(polygons_to_process):
            # Get exterior coordinates (Labelbox expects them in lon,lat order)
            exterior_coords = list(poly.exterior.coords[:-1])  # Remove duplicate last point
            
            # Create Labelbox polygon points
            points = []
            for lon, lat in exterior_coords:
                points.append(Point(x=lon, y=lat))
            
            # Create the polygon geometry
            lb_polygon = LBPolygon(points=points)
            
            # Create annotation with schema reference
            # You'll need to update the schema_id with your actual tool ID
            annotation = {
                "uuid": str(uuid.uuid4()),
                "schemaId": None,  # Will be set based on your ontology
                "dataRow": {"id": data_row_id},
                "polygon": lb_polygon
            }
            
            annotations.append(annotation)
    
    return annotations

def upload_prelabels_mal(client, project, data_row, lakes_data, study_site):
    # Ontology lookup
    ontology = project.ontology()
    polygon_tool = next((t for t in ontology.normalized["tools"] if t["tool"] == "polygon"), None)
    if not polygon_tool:
        raise ValueError("Polygon tool not found in ontology")
    print("Using tool:", polygon_tool["name"])

    # Collect label objects
    labels = []
    for lake in lakes_data:
        geom = lake["geometry"]
        if geom.is_empty:
            continue
        polys = [geom] if geom.geom_type == "Polygon" else list(geom.geoms)
        for poly in polys:
            coords = list(poly.exterior.coords)[:-1]
            lb_poly = LBPolygon(points=[Point(x=lon, y=lat) for lon, lat in coords])
            obj_ann = ObjectAnnotation(
                name=polygon_tool["name"],
                value=lb_poly
            )
            labels.append(
                Label(
                    data={"uid": data_row.uid}, 
                    annotations=[obj_ann]
                )
            )

    ndjson = list(NDJsonConverter.serialize(labels))
    print("Prepared", len(ndjson), "prelabels for upload. Preview:")
    print(json.dumps(ndjson[0], indent=2)[:400], "...")  # preview first item

    # Upload MAL prelabels
    unique_name = f"{study_site}_prelabels_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    job = MALPredictionImport.create_from_objects(
        client=client,
        project_id=project.uid,
        name=unique_name,
        predictions=ndjson
    )
    job.wait_until_done()
    if job.errors:
        print("Errors:", job.errors)
    else:
        print("Uploaded prelabels:", unique_name, "to DataRow:", data_row.uid)

    return job

data_row, dataset = create_xyz_tile_data_row(client, project, study_site, lat0, lon0, rect)
job = upload_prelabels_mal(client, project, data_row, lakes_data, study_site)

Using dataset: Lake_Ice_Water_Bodies (cmen9im6u005q0703rrwm5ymu)
Created new DataRow: cmfmti8h94xpf0744orb7xjhz
Attached DataRow to project via GR_batch_20250916_131918
Using tool: Lakes
Prepared 1280 prelabels for upload. Preview:
{
  "uuid": "0f05a105-1561-488b-ae45-6577caa97991",
  "dataRow": {
    "id": "cmfmti8h94xpf0744orb7xjhz"
  },
  "name": "Lakes",
  "classifications": [],
  "polygon": [
    {
      "x": 36.56249888900004,
      "y": 68.69915081100004
    },
    {
      "x": 36.56102021800007,
      "y": 68.69912948300004
    },
    {
      "x": 36.560697435000066,
      "y": 68.70208642800003
    },
    {
      "x ...
Uploaded prelabels: GR_prelabels_20250916_131931 to DataRow: cmfmti8h94xpf0744orb7xjhz


In [11]:
def add_to_labeling_queue(project, data_row):
    """Check whether the data row is already in the labeling queue"""
    if not data_row:
        print("No data row available")
        return
    
    try:
        # Just confirm association
        print(f"DataRow {data_row.uid} is already attached to project {project.name}.")
        print("You can now start labeling in the Labelbox UI.")
    except Exception as e:
        print(f"Error checking DataRow status: {e}")

# Add to labeling queue
add_to_labeling_queue(project, data_row)

DataRow cmfmti8h94xpf0744orb7xjhz is already attached to project Lake Ice Project - Water Body Delineation.
You can now start labeling in the Labelbox UI.
