# Metadata Preparation

Extract trajectory and camera metadata from Innovitas raw data delivery (JSON file).

In [None]:
import os
import json
import geopandas as gpd
from shapely.geometry import Point

def json_to_geopackage(input_folder, output_gpkg_path):
    all_records = []

    for filename in os.listdir(input_folder):
        if filename.endswith('.json'):
            filepath = os.path.join(input_folder, filename)
            with open(filepath, 'r', encoding='utf-8') as f:
                try:
                    data = json.load(f)
                    items = data.get("items", [])
                except json.JSONDecodeError as e:
                    print(f"Error reading {filename}: {e}")
                    continue

                for item in items:
                    dataSourceId = item.get("dataSourceId", "")
                    if not dataSourceId.startswith("lb"):
                        continue
                    
                    record = {
                        "dataSourceId": dataSourceId,
                        "gpsWeekSeconds": item.get("gpsWeekSeconds"),
                        "imagePath": item.get("imagePath", []),
                        "x": item.get("x"),
                        "y": item.get("y"),
                        "z": item.get("z"),
                        "rx": item.get("rx"),
                        "ry": item.get("ry"),
                        "rz": item.get("rz"),
                        "size": 2048 if dataSourceId == "lb4" else 4016,
                        "geometry": Point(item["x"], item["y"], item["z"]),
                    }
                    all_records.append(record)

    if not all_records:
        print("No valid items found.")
        return

    gdf = gpd.GeoDataFrame(all_records, crs="EPSG:2056")
    gdf.to_file(output_gpkg_path, driver="GPKG")
    print(f"Saved to {output_gpkg_path}")

# Example usage
input_folder = "../datenextrakt_infra3D/datadescription/configurations/images"
output_gpkg_path = "trajectory.gpkg"
json_to_geopackage(input_folder, output_gpkg_path)

Saved to zh_trajectory.gpkg


# Project 3D polygons to spherical panoramas


In [1]:
import os
import sys

# Add the project root to the Python path
project_root = os.path.abspath(os.path.join(os.getcwd(), '../'))
sys.path.append(project_root)

import ast
import json
import numpy as np
import pandas as pd
import geopandas as gpd

from tqdm import tqdm
from shapely.geometry import Point, Polygon

from scripts.utils.projection import *

- Configure source file path. 
- If the trajectory is so dense that the validation workload is too heavy. Set downsample rate of trajectory to process every several images.
- Depending on mount configuration, set up the cube face index to use for projection. In most cases, top/bottom faces are looking at sky/vehicle and do not have useful information. For some cases, the back face is also useless if the camera is mounted on a train/tram. 
- The face order can be found in cubemap data description file. E.g., `FBLRUD` means `Front, Back, Left, Right, Up, Down`.

In [None]:
downsample_rate = 4
cube_idx_to_project = [1, 4, 5]

# Paths 
image_dir = '../data/zurich/Innovitas'
gdf_gt = gpd.read_file('../data/zurich/zh_gt_3d.gpkg', layer='zh_gt_3d') 
gpkg_path = "../data/zurich/zh_gt_traject.gpkg"
camera_df = gpd.read_file(gpkg_path)
camera_df.imagePath = camera_df.imagePath.apply(ast.literal_eval)
camera_df.head(5)

Unnamed: 0,dataSourceId,gpsWeekSeconds,imagePath,x,y,z,rx,ry,rz,size,frame_id,geometry
0,lb4,114284.989026,"[102-lb4-0-35582.jpg, 102-lb4-1-35582.jpg, 102...",2682941.148,1247794.683,410.838,-1.589392,-1.238151,3.120913,2048,102-35582,POINT Z (2682941.148 1247794.683 410.838)
1,lb4,114285.331898,"[102-lb4-0-35583.jpg, 102-lb4-1-35583.jpg, 102...",2682942.126,1247794.352,410.845,-1.588918,-1.238229,3.11991,2048,102-35583,POINT Z (2682942.126 1247794.352 410.845)
2,lb4,114285.676164,"[102-lb4-0-35584.jpg, 102-lb4-1-35584.jpg, 102...",2682943.113,1247794.021,410.851,-1.587972,-1.238393,3.120413,2048,102-35584,POINT Z (2682943.113 1247794.021 410.851)
3,lb4,114286.018687,"[102-lb4-0-35585.jpg, 102-lb4-1-35585.jpg, 102...",2682944.104,1247793.685,410.862,-1.588698,-1.238377,3.118494,2048,102-35585,POINT Z (2682944.104 1247793.685 410.862)
4,lb4,114286.345684,"[102-lb4-0-35586.jpg, 102-lb4-1-35586.jpg, 102...",2682945.063,1247793.365,410.872,-1.588682,-1.23841,3.119139,2048,102-35586,POINT Z (2682945.063 1247793.365 410.872)


In [None]:
tqdm.pandas()

image_info_ls = []
annotations = []
annotation_id = 1
# Iterate over each camera
for cam_idx, cam in tqdm(camera_df[::downsample_rate].iterrows()):
    lb_face_flag = []

    cam_point = Point(cam.x, cam.y)
    cam_buffer = cam_point.buffer(20)  # 20-meter radius
    height, width = cam['size'], cam['size']

    # Filter circles within 20 meters
    nearby_circles = gdf_gt[gdf_gt.geometry.centroid.within(cam_buffer)]
    
    # Proceed to project these circles to the image frame
    if len(nearby_circles) == 0:
        continue

    for gt_idx, circle in nearby_circles.iterrows():

        # Create a GeoFrame instance with the provided metadata
        imagemeta = ImageMeta(
            width=width,
            height=height,
            pixsize=0.001,
            focal_length=float(height * 0.001 / 2)
        )

        ##
        omega, phi, kappa = rxryrz_to_opk(cam.rx, cam.ry, cam.rz)

        geoframe = GeoFrame(
            easting=cam.x,
            northing=cam.y,
            height=cam.z,
            omega=omega,
            phi=phi,
            kappa=kappa,
            imagemeta=imagemeta,
            camera_model=cam.dataSourceId
        )

        # world_coords = np.array(circle.geometry.geoms[0].exterior.coords)
        world_coords = np.array(circle.geometry.exterior.coords)
        # Get latlon for all points
        latlon = geoframe.get_LatLon(world_coords)

        # Get sensor_id and cubeface_id for each point
        sensor_ids = geoframe.latlon_to_sensor_id(latlon)
        cubeface_ids = geoframe.get_face_id() 


        # Group points by (sensor_id, cubeface_id) using pandas
        df_group = pd.DataFrame({
            'sensor_id': sensor_ids,
            'cubeface_id': cubeface_ids,
            'index': range(len(world_coords))
        })
        grouped_indices = { (sid, cid): group['index'].tolist() 
                            for (sid, cid), group in df_group.groupby(['sensor_id', 'cubeface_id']) }

        # Store sensor coordinates for all points in original order
        sensor_coords_all = np.zeros((len(world_coords), 2), dtype=np.float32)

        # For each group, project to sensor coordinates
        for (sid, cid), indices in grouped_indices.items():
            group_world_coords = world_coords[indices]
            # Project to frame coordinates
            frame_coords = geoframe.world_to_frame(group_world_coords, sid)
            # Project to model coordinates
            model_coords = geoframe.frame_to_model(frame_coords, sid, cid)
            # Project to image coordinates
            image_coords = geoframe.model_to_image(model_coords)
            # Project to sensor coordinates
            sensor_coords = geoframe.image_to_sensor(image_coords)
            # Store in the correct indices
            sensor_coords_all[indices] = sensor_coords

        for cube_id in np.unique(cubeface_ids):
            if cube_id in cube_idx_to_project:
                continue
            if cube_id not in lb_face_flag:
                lb_face_flag.append(cube_id)

            img_coords = sensor_coords_all[cubeface_ids == cube_id]

            if len(img_coords) <= 50:
                continue

            img_polygon = Polygon(img_coords)
            # Calculate the area
            area = img_polygon.area

            if area <= 200:
                continue

            # Retrieve the bounding box coordinates
            minx, miny, maxx, maxy = img_polygon.bounds

            # Compute width and height
            w = maxx - minx
            h = maxy - miny

                # Format the bounding box for COCO: [x, y, width, height]
            bbox = [float(minx), float(miny), float(w), float(h)]

            # Append to annotations list    
            annotations.append({
                "id": annotation_id,
                "object_id": gt_idx,
                "image_id": int(cam_idx * 4 + cube_id),
                "category_id": int(circle.checked),
                "segmentation": [img_coords.flatten().astype(int).tolist()],
                "area": area,
                "bbox": bbox,
                "iscrowd": 0
            })
            annotation_id += 1

    for cube_id in lb_face_flag:
        # Define image metadata
        image_info_ls.append({
            "id": int(cam_idx * 4 + cube_id),
            "file_name": cam.imagePath[cube_id],
            "width": int(width),
            "height": int(height)
        })

# Define categories
categories = [
    {
        "id": 1,
        "name": "solid manhole",
        "supercategory": "manhole"
    },
    {
        "id": 2,
        "name": "perforated manhole",
        "supercategory": "manhole"
    },
    {
        "id": 3,
        "name": "concrete manhole",
        "supercategory": "manhole"
    },
    {
        "id": 4,
        "name": "drain manhole",
        "supercategory": "manhole"
    },
]

# Sort image_info_ls by the 'id' field in ascending numerical order
image_info_ls = sorted(image_info_ls, key=lambda x: x['id'])

# Compile the final COCO structure
coco_format = {
    "images": image_info_ls,
    "annotations": annotations,
    "categories": categories
}

print(f"{len(annotations)} annotations projected to images to validate!")

2274it [00:34, 66.27it/s] 

9754 annotations projected to images to validate!





In [4]:
# Save to a JSON file
with open("zh_COCO_test.json", "w") as json_file:
    json.dump(coco_format, json_file, indent=4)