# Getting Started With 3D Data in Scale Nucleus

---



In this tutorial, we'll walk through a demo of how to upload 3D Scenes to Nucleus using the open source Pandaset dataset

### Step 1: Install dependencies

In [1]:
%%bash
pip install scale-nucleus
pip install boto3
pip install botocore



You should consider upgrading via the '/Users/drewkaul/Library/Caches/pypoetry/virtualenvs/scale-nucleus-nqXix_D8-py3.8/bin/python -m pip install --upgrade pip' command.
You should consider upgrading via the '/Users/drewkaul/Library/Caches/pypoetry/virtualenvs/scale-nucleus-nqXix_D8-py3.8/bin/python -m pip install --upgrade pip' command.
You should consider upgrading via the '/Users/drewkaul/Library/Caches/pypoetry/virtualenvs/scale-nucleus-nqXix_D8-py3.8/bin/python -m pip install --upgrade pip' command.


### Step 2: Read PandaSet files from S3

In [2]:
import boto3
from botocore import UNSIGNED
from botocore.client import Config

s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
paginator = s3.get_paginator('list_objects_v2')
result = paginator.paginate(Bucket="pandaset-public")

In [3]:
object_paths = []
for page in result:
    if "Contents" in page:
        for key in page["Contents"]:
            object_path = key["Key"]
            object_paths.append(object_path)

In [4]:
print(object_paths)

['pandaset_0/.DS_Store', 'pandaset_0/001/.DS_Store', 'pandaset_0/001/LICENSE.txt', 'pandaset_0/001/annotations/.DS_Store', 'pandaset_0/001/annotations/cuboids/00.json', 'pandaset_0/001/annotations/cuboids/01.json', 'pandaset_0/001/annotations/cuboids/02.json', 'pandaset_0/001/annotations/cuboids/03.json', 'pandaset_0/001/annotations/cuboids/04.json', 'pandaset_0/001/annotations/cuboids/05.json', 'pandaset_0/001/annotations/cuboids/06.json', 'pandaset_0/001/annotations/cuboids/07.json', 'pandaset_0/001/annotations/cuboids/08.json', 'pandaset_0/001/annotations/cuboids/09.json', 'pandaset_0/001/annotations/cuboids/10.json', 'pandaset_0/001/annotations/cuboids/11.json', 'pandaset_0/001/annotations/cuboids/12.json', 'pandaset_0/001/annotations/cuboids/13.json', 'pandaset_0/001/annotations/cuboids/14.json', 'pandaset_0/001/annotations/cuboids/15.json', 'pandaset_0/001/annotations/cuboids/16.json', 'pandaset_0/001/annotations/cuboids/17.json', 'pandaset_0/001/annotations/cuboids/18.json', 'pa

In [68]:
def is_image_path(object_path):
    return "camera/" in object_path and ".jpg" in object_path

def is_pointcloud_path(object_path):
    return "lidar/" in object_path and ".json" in object_path
  
def is_cuboid_path(object_path):
    return "cuboids/" in object_path and ".json" in object_path

In [6]:
image_paths = []
pointcloud_paths = []
cuboid_paths = []

for path in object_paths:
    if is_image_path(path):
        image_paths.append(path)
    elif is_pointcloud_path(path):
        pointcloud_paths.append(path)
    elif is_cuboid_path(path):
        cuboid_paths.append(path)

In [7]:
print(image_paths)
print(pointcloud_paths)
print(cuboid_paths)

['pandaset_0/001/camera/back_camera/00.jpg', 'pandaset_0/001/camera/back_camera/01.jpg', 'pandaset_0/001/camera/back_camera/02.jpg', 'pandaset_0/001/camera/back_camera/03.jpg', 'pandaset_0/001/camera/back_camera/04.jpg', 'pandaset_0/001/camera/back_camera/05.jpg', 'pandaset_0/001/camera/back_camera/06.jpg', 'pandaset_0/001/camera/back_camera/07.jpg', 'pandaset_0/001/camera/back_camera/08.jpg', 'pandaset_0/001/camera/back_camera/09.jpg', 'pandaset_0/001/camera/back_camera/10.jpg', 'pandaset_0/001/camera/back_camera/11.jpg', 'pandaset_0/001/camera/back_camera/12.jpg', 'pandaset_0/001/camera/back_camera/13.jpg', 'pandaset_0/001/camera/back_camera/14.jpg', 'pandaset_0/001/camera/back_camera/15.jpg', 'pandaset_0/001/camera/back_camera/16.jpg', 'pandaset_0/001/camera/back_camera/17.jpg', 'pandaset_0/001/camera/back_camera/18.jpg', 'pandaset_0/001/camera/back_camera/19.jpg', 'pandaset_0/001/camera/back_camera/20.jpg', 'pandaset_0/001/camera/back_camera/21.jpg', 'pandaset_0/001/camera/back_cam

### Step 3: Construct LidarScenes

In [11]:
import os
import re
import json
import nucleus
from nucleus import NucleusClient, DatasetItem, Frame, LidarScene

In [58]:
def read_json(path):
    s3 = boto3.resource('s3')
    content_object = s3.Object(BUCKET, path)
    file_content = content_object.get()['Body'].read().decode('utf-8')
    return json.loads(file_content)

In [95]:
BUCKET = "pandaset-public"
S3_BUCKET = "s3://pandaset-public"

In [59]:
# For this demo, we will upload scenes 001, 006, and 023 from PandaSet
SCENE_IDS = ["001", "006", "023"]
CAMERA_SENSORS = ["back_camera", "front_camera", "front_left_camera", "front_right_camera", "left_camera", "right_camera"]

scenes = []
for scene_id in SCENE_IDS:
    scene_ref_id = f"scene-{scene_id}"
    scene = LidarScene(scene_ref_id)

    camera_sensor_to_params = {}
    for sensor in CAMERA_SENSORS:
        base_path = f"pandaset_0/{scene_id}/camera/{sensor}/"
        intrinsics_path = os.path.join(base_path, 'intrinsics.json')
        poses_path = os.path.join(base_path, 'poses.json')
        intrinsics = read_json(intrinsics_path)
        poses = read_json(poses_path)
        camera_params = {"intrinsics": intrinsics, "poses": poses}
        camera_sensor_to_params[sensor] = camera_params

    image_paths_in_scene = [path for path in image_paths if f"{scene_id}/" in path]
    for image_path in image_paths_in_scene:
        tokens = re.split('/|\.', image_path)
        frame_idx = int(tokens[-2])
        sensor_name = tokens[-3]

        params = camera_sensor_to_params[sensor_name]
        pose = params["poses"][frame_idx]
        camera_params = {**params["intrinsics"], **pose}

        image_url = os.path.join(S3_BUCKET, image_path)    
        reference_id = f"scene-{SCENE_ID}-frame-{frame_idx}-{sensor_name}"
        metadata = {"camera_params": camera_params}
        item = DatasetItem(image_location=image_url, reference_id=reference_id, metadata=metadata)
        scene.add_item(frame_idx, sensor_name, item)

    pointcloud_paths_in_scene = [path for path in pointcloud_paths if f"{scene_id}/" in path]
    for pointcloud_path in pointcloud_paths_in_scene:
        tokens = re.split('/|\.', pointcloud_path)
        frame_idx = int(tokens[-2])
        sensor_name = tokens[-3]

        pointcloud_url = os.path.join(S3_BUCKET, pointcloud_path)
        reference_id = f"scene-{SCENE_ID}-frame-{frame_idx}-{sensor_name}"
        item = DatasetItem(pointcloud_location=pointcloud_url, reference_id=reference_id)
        scene.add_item(frame_idx, sensor_name, item)
    
    scenes.append(scene)

In [60]:
scene_1 = scenes[0]
print("number of lidar DatasetItems:", len(scene_1.get_items_from_sensor("lidar")))
print("number of DatasetItems:", len(scene_1.get_items()))
print("number of frames:", scene_1.length)
print("number of sensors:", scene_1.num_sensors)
print("sensors:", scene_1.get_sensors())

number of lidar DatasetItems: 80
number of DatasetItems: 560
number of frames: 80
number of sensors: 7
sensors: ['front_camera', 'front_right_camera', 'back_camera', 'front_left_camera', 'lidar', 'right_camera', 'left_camera']


### Step 4: Append Scenes to Dataset

In [64]:
API_KEY = "live_318209d04e3746dbafbe1f195a4a1872"
TEST_DATASET_NAME = "test_dataset_3d"

In [65]:
client = NucleusClient(API_KEY)
dataset = client.create_dataset(TEST_DATASET_NAME)

In [66]:
append_job = dataset.append(scenes, asynchronous=True)
print(append_job)

AsyncJob(job_id='job_c4d3zme1h514hxf5v7c0', job_last_known_status='Running', job_type='uploadLidarScene', job_creation_time='2021-08-16T10:37:05.236Z', client=NucleusClient(api_key='live_318209d04e3746dbafbe1f195a4a1872', use_notebook=False, endpoint='http://localhost:3000/v1/nucleus'))


In [67]:
append_job.sleep_until_complete()
print(append_job.status())

Status at Mon Aug 16 05:37:38 2021: {'job_id': 'job_c4d3zme1h514hxf5v7c0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Mon Aug 16 05:37:43 2021: {'job_id': 'job_c4d3zme1h514hxf5v7c0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Mon Aug 16 05:37:49 2021: {'job_id': 'job_c4d3zme1h514hxf5v7c0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Mon Aug 16 05:37:54 2021: {'job_id': 'job_c4d3zme1h514hxf5v7c0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Mon Aug 16 05:37:59 2021: {'job_id': 'job_c4d3zme1h514hxf5v7c0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Mon Aug 16 05:38:05 2021: {'job_id': 'job_c4d3zme1h514hxf5v7c0', 'status': 'Runn

KeyboardInterrupt: 

### Step 5: Upload Annotations

In [91]:
# Let's upload annotations for scene 23
SCENE_ID = "023"
from nucleus import CuboidAnnotation

annotations = []
cuboid_paths_in_scene = [path for path in cuboid_paths if f"{SCENE_ID}/" in path]
for (i, cuboid_path) in enumerate(cuboid_paths_in_scene):
    tokens = re.split('/|\.', cuboid_path)
    frame_idx = int(tokens[-2])
    reference_id = f"scene-{SCENE_ID}-frame-{frame_idx}-lidar"
    
    cuboids_json = read_json(cuboid_path)
    for cuboid in cuboids_json:
        cuboid["reference_id"] = reference_id
    
    new_annotations = [CuboidAnnotation.from_json(cuboid) for cuboid in cuboids_json]
    annotations.extend(new_annotations)

12637


In [93]:
annotate_job = dataset.annotate(annotations, asynchronous=True)
print(annotate_job)

12637
AsyncJob(job_id='job_c4d4j5g1h514sk61cm7g', job_last_known_status='Running', job_type='uploadAnnotations', job_creation_time='2021-08-16T11:16:38.123Z', client=NucleusClient(api_key='live_318209d04e3746dbafbe1f195a4a1872', use_notebook=False, endpoint='http://localhost:3000/v1/nucleus'))


### Step 6: Upload Predictions

In [None]:
# We'll demonstrate how to upload model predictions using fake predictions
from nucleus import Point3D, CuboidPrediction

def add_noise(obs):
    noise = np.random.normal(0, 1)
    return obs + noise

def add_noise_point(point: Point3D):
    return Point3D(
        add_noise(point.position.x),
        add_noise(point.position.y),
        add_noise(point.position.z)
    )

def add_noise_gt(gt: CuboidAnnotation):
    return CuboidPrediction(
        label=gt.label,
        position=add_noise_point(gt.position),
        dimensions=add_noise_point(gt.dimensions),
        yaw=add_noise(yaw),
        reference_id=gt.reference_id,
        item_id=gt.item_id,
        annotation_id=gt.annotation_id,
        metadata=gt.metadata
    )

predictions = [add_noise_gt(annotation) for annotation in annotations]

In [None]:
model = client.add_model(name="Test Model", reference_id="test")
model_run = model.create_run(
    name="Test Model Run",
    dataset=dataset,
    predictions=[],
)

predict_job = model_run.predict(predictions, asynchronous=True)
model_run.commit()