# Project 3D polygons to spherical panoramas

In [1]:
import os
import sys
import json


# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '../'))
sys.path.append(project_root)

import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from shapely.geometry import Point, Polygon
from scripts.utils.projection import *


Load 3D ground truth object coordinates

In [None]:
gt_path = '../data/neuchatel/NE_GT_3D.gpkg'
gdf_gt = gpd.read_file(gt_path, layer='NE_GT_3D')
gdf_gt

Unnamed: 0,id,geometry
0,4,"POLYGON Z ((2560778.546 1204397.79 432.848, 25..."
1,3,"POLYGON Z ((2560818.338 1204422.75 433.011, 25..."
2,8,"POLYGON Z ((2560781.04 1204415.25 433.16, 2560..."
3,5,"POLYGON Z ((2560888.203 1204466.718 433.743, 2..."
4,2063,"POLYGON Z ((2557971.008 1203983.64 534.932, 25..."
...,...,...
1396,2892,"POLYGON Z ((2527886.832 1195469.928 926.829, 2..."
1397,2893,"POLYGON Z ((2527881.852 1195469.449 926.797, 2..."
1398,2894,"POLYGON Z ((2527871.733 1195469.496 926.806, 2..."
1399,2830,"POLYGON Z ((2529407.513 1195464.325 929.927, 2..."


Check objects are with valid height from LiDAR elevation. If height is missing, ignore the object.

In [3]:
for gt_idx, circle in gdf_gt.iterrows():
    # world_coords = np.array(circle.geometry.geoms[0].exterior.coords)
    world_coords = np.array(circle.geometry.exterior.coords)
    x, y, z = world_coords.T

    if (z == 0).all():
        # Skip if all Z coordinates are zero
        print(f"Skipping GT {gt_idx} due to zero Z values.")
    if (z == 0).any():
            # replace zero Z values with the nearest non-zero value in z vector
            z = replace_zeros_with_nearest(z)
    # update circle.geometry with the new z values
    new_coords = np.column_stack((x, y, z))
    new_polygon = Polygon(new_coords)
    gdf_gt.at[gt_idx, 'geometry'] = new_polygon
gdf_gt = gdf_gt.set_crs(epsg=2056, allow_override=True)
gdf_gt.to_file(gt_path, driver='GPKG', overwrite=True)


Load camera meta for each image. Set data type suffix and camera metadata and column names for index consistance.

In [None]:
img_suffix = '.jpg'
height, width = 4000, 8000
camera_offset = [0, 0, 0, 0.5, 0, -0.3]

# Load camera metadata into a DataFrame
camera_df = pd.read_csv('../data/neuchatel/ne_traject.csv')  # Replace with your actual file
camera_df = camera_df[['file_name', 'datetimeoriginal', 'x_m_', 'y_m_', 'z_m_', 'gpsimgdirection', 'gpspitch', 'gpsroll']]
camera_df.rename(
    columns={
        'file_name': 'File',
        'datetimeoriginal':'Time',
        'x_m_': 'x',
        'y_m_': 'y', 
        'z_m_': 'z',
        'gpsimgdirection': 'course', 
        'gpspitch': 'pitch', 
        'gpsroll': 'roll'
}, inplace=True)

camera_df['File'] = [f if f.endswith(img_suffix) else f + img_suffix for f in camera_df['File'].values]
# camera_df = camera_df[(camera_df.x >= 2559490) & (camera_df.x <= 2559600) &
#                       (camera_df.y >= 1203500) & (camera_df.y <= 1203650)]
camera_df.to_csv('../../data/neuchatel/coordinates.txt', sep='\t', index=False)
camera_df.tail()

Unnamed: 0,File,Time,x,y,z,course,pitch,roll
10833,20200408_135016_001091.jpg,305978.99593,2530469.497,1191873.376,803.424,62.717771,-3.818139,-1.482756
10834,20200408_135016_001090.jpg,305978.64595,2530473.956,1191875.766,803.324,62.541726,-3.690216,-1.252783
10835,20200408_135016_002386.jpg,306497.47495,2530476.199,1191873.894,803.364,242.095375,-1.169486,1.924816
10836,20200408_135016_002385.jpg,306497.09495,2530471.777,1191871.458,803.464,242.005249,-1.289378,1.989834
10837,20200408_135016_002383.jpg,306496.33496,2530462.945,1191866.565,803.664,241.937913,-1.651241,2.100855


In [5]:
tqdm.pandas()
image_info_ls = []
annotations = []
annotation_id = 1
# Iterate over each camera
for cam_idx, cam in tqdm(camera_df.iterrows()):
    cam_point = Point(cam.x, cam.y)
    cam_buffer = cam_point.buffer(20)  # 10-meter radius

    # Filter circles within 20 meters
    nearby_circles = gdf_gt[gdf_gt.geometry.centroid.within(cam_buffer)]
    
    # Proceed to project these circles to the image frame
    if len(nearby_circles) == 0:
        continue

    for gt_idx, circle in nearby_circles.iterrows():
        # world_coords = np.array(circle.geometry.geoms[0].exterior.coords)
        world_coords = np.array(circle.geometry.exterior.coords)
        x, y, z = world_coords.T

        if (z == 0).all():
            # Skip if all Z coordinates are zero
            print(f"Skipping GT {gt_idx} at camera {cam_idx} due to zero Z values.")
            continue
        elif (z == 0).any():
            # replace zero Z values with the nearest non-zero value in z vector
            z[z == 0] = np.median(z[z != 0])

        camera_meta = {
            'x': cam.x + camera_offset[0],
            'y': cam.y + camera_offset[1],
            'z': cam.z + camera_offset[2],
            'yaw': cam.course + camera_offset[3], 
            'pitch': cam.pitch + camera_offset[4], 
            'roll': cam.roll + camera_offset[5]
            }

        # cam_ori = {'yaw': cam.course, 'pitch': cam.pitch, 'roll': cam.roll}
        # Transform points to camera CRS on GPU
        cam_x, cam_y, cam_z = transform_to_camera_crs(x, y, z, camera_meta)
            
        u, v = spherical_projection(cam_x, cam_y, cam_z, width, height)

        # Handle wrapping around the image edges
        if u.max() - u.min() > width / 2:
            # If the range of u exceeds half the image width, keep the largest segment
            u_right = np.where(u > width / 2, u , width-1)  # Wrap around
            u_left = np.where(u < width / 2, u , 0)  # Wrap around
            # compare the two segments and keep the one with the larger range
            u = u_right if np.ptp(u_right) > np.ptp(u_left) else u_left

        u = np.clip(u, 0, width - 1)  # Ensure u is within image bounds
        i, j = u.astype(int), v.astype(int)
        
        # create a polygon with image coordinates and save to COCO format 
        # Create a polygon from the image coordinates
        coords = np.array([i, j]).T
        img_polygon = Polygon(coords)
        # Calculate the area
        area = img_polygon.area

        # Retrieve the bounding box coordinates
        minx, miny, maxx, maxy = img_polygon.bounds

        # Compute width and height
        w = maxx - minx
        h = maxy - miny

        if w >= 1000:
            # Skip if the bounding box is too large
            continue
        # Format the bounding box for COCO: [x, y, width, height]
        bbox = [float(minx), float(miny), float(w), float(h)]

        # Append to annotations list    
        annotations.append({
            "id": annotation_id,
            "object_id": gt_idx,
            "image_id": cam_idx,
            "category_id": 1,
            "segmentation": [list(coords.flatten().astype('float'))],
            "area": area,
            "bbox": bbox,
            "iscrowd": 0
        })
        annotation_id += 1

    # Define image metadata
    image_info_ls.append({
        "id": cam_idx,
        "file_name": cam.File,
        "width": width,
        "height": height
    })

# Define categories
categories = [
    {
        "id": 1,
        "name": "manhole",
        "supercategory": "none"
    }
]

# Compile the final COCO structure
coco_format = {
    "images": image_info_ls,
    "annotations": annotations,
    "categories": categories
}

print(f"{len(annotations)} annotations projected to images to validate!")

10838it [01:28, 122.41it/s]

18451 annotations projected to images to validate!





In [None]:
# Save to a JSON file
with open("COCO_img_gt.json", "w") as json_file:
    json.dump(coco_format, json_file, indent=4)
