In [1]:
import nucleus
from nucleus import Scene, LidarScene, DatasetItem, NucleusClient

In [34]:
BUCKET = "scale-ml"
S3_BUCKET = "s3://scale-ml/"
OBJECT_PATH = "pandaset/023/"
SCENE_URL = "s3://scale-ml/pandaset/023/"

In [3]:
import sys
!{sys.executable} -m pip install boto3

You should consider upgrading via the '/Users/drewkaul/Library/Caches/pypoetry/virtualenvs/scale-nucleus-nqXix_D8-py3.8/bin/python -m pip install --upgrade pip' command.[0m


In [4]:
import boto3

client = boto3.client('s3')
paginator = client.get_paginator('list_objects_v2')
result = paginator.paginate(Bucket=BUCKET, Prefix=OBJECT_PATH, StartAfter=OBJECT_PATH)

In [5]:
image_paths = []
pointcloud_paths = []

def is_image_path(object_path):
    return "camera/" in object_path and ".jpg" in object_path

def is_pointcloud_path(object_path):
    return "lidar/" in object_path and ".json" in object_path

for page in result:
    if "Contents" in page:
        for key in page["Contents"]:
            object_path = key["Key"]
            
            if is_image_path(object_path):
                image_paths.append(object_path)
            
            if is_pointcloud_path(object_path):
                pointcloud_paths.append(object_path)

In [6]:
print(len(image_paths))
print(image_paths[0])
# print(image_paths)

print(len(pointcloud_paths))
print(pointcloud_paths[0])
# print(pointcloud_paths)

480
pandaset/023/camera/back_camera/00.jpg
80
pandaset/023/lidar/00.json


In [9]:
PRESIGN_EXPIRY_SECONDS = 72000 # 20 hr

def s3_sign(bucket, key):
    s3 = boto3.client("s3")
    return s3.generate_presigned_url(
        ClientMethod="get_object",
        Params={
            "Bucket": bucket,
            "Key": key,
        },
        ExpiresIn=PRESIGN_EXPIRY_SECONDS,
    )

In [11]:
import os
import re

scene_number = re.split('/', OBJECT_PATH)[1]
scene_ref_id = f"scene-{scene_number}"
scene = LidarScene(scene_ref_id)

for image_path in image_paths:
    # image_url = os.path.join(S3_BUCKET, image_path)
    image_url = s3_sign(BUCKET, image_path)
    
    tokens = re.split('/|\.', image_path)
    frame_idx = int(tokens[-2])
    sensor_name = tokens[-3]
    reference_id = f"scene-{scene_number}-frame-{frame_idx}-{sensor_name}"
    
    item = DatasetItem(image_location=image_url, reference_id=reference_id)
    scene.add_item(frame_idx, sensor_name, item)

for pointcloud_path in pointcloud_paths:
    # pointcloud_url = os.path.join(S3_BUCKET, pointcloud_path)
    pointcloud_url = s3_sign(BUCKET, pointcloud_path)
    
    tokens = re.split('/|\.', pointcloud_path)
    frame_idx = int(tokens[-2])
    sensor_name = tokens[-3]
    reference_id = f"scene-{scene_number}-frame-{frame_idx}-{sensor_name}"
    
    item = DatasetItem(pointcloud_location=pointcloud_url, reference_id=reference_id)
    scene.add_item(frame_idx, sensor_name, item)

In [12]:
print("number of frames:", len(scene.frames_dict))
frame_0 = scene.frames_dict[0]
print("number of sensors per frame:", len(frame_0.items.values()))
print(frame_0)

number of frames: 80
number of sensors per frame: 7
Frame(items={'back_camera': DatasetItem(image_location='https://scale-ml.s3.amazonaws.com/pandaset/023/camera/back_camera/00.jpg?AWSAccessKeyId=ASIAUPBNRLRVNWJG7NWZ&Signature=WH8T0nOTgXwcH4XpLpxd7Rr8YFI%3D&x-amz-security-token=%22FwoGZXIvYXdzEAUaDMQEnHLBwzxJG%2BfjjSKIAvap9QJe0kt4wFJIUXqN0hW3GyhbkXWX%2BGgbrUT4BuCifd1%2F05c%2BS991vWygpdesygu%2BnaU%2BEGVLCaafpGEJQj9SgdRFKLJIQVquzKeo%2FGiHwJ3bmN6gpnx0tH2QMP23AxfMPatoluFMkNzM0LdyRz%2Bq2WEpE8V0IB4tZvnsq9LjIfWijBKEUl6oDpZfWMK1IneJQQVolFylaBHmJ2BH1nqwiKFzYqMIQ1yKzb4bM9Yx7O5kpkD6HP36Mx%2FzK7GqvHiSQK58m2XXhCoBZLZfYWa01mJrUJLngZ6129qXPRtxvW5y%2FzlhCDKcSyrpxXId3wsWLy1mpejzoAT0RWXhYFjXmsGmjGwABii3hNuIBjIra4553v6AxHGTwEAnoM%2FAtyqH7Mm1EeLcWGWqWGhSbD5tbyrH4zRcN8YfDA%3D%3D%22&Expires=1628954199', reference_id='scene-023-frame-0-back_camera', item_id=None, metadata=None, pointcloud_location=None), 'front_camera': DatasetItem(image_location='https://scale-ml.s3.amazonaws.com/pandaset/023/camera/front_c

In [13]:
API_KEY = "live_318209d04e3746dbafbe1f195a4a1872"
TEST_DATASET_NAME = "test_dataset_3d"

In [16]:
client = NucleusClient(API_KEY)
dataset = client.create_dataset(TEST_DATASET_NAME)

In [18]:
# Upload scene to dataset
scenes = [scene]
response = dataset.append(scenes, asynchronous=True)
print(response)

AsyncJob(job_id='job_c4bceptpfpdcw10d1hc0', job_last_known_status='Running', job_type='uploadLidarScene', job_creation_time='2021-08-13T19:26:19.107Z', client=NucleusClient(api_key='live_318209d04e3746dbafbe1f195a4a1872', use_notebook=False, endpoint='http://localhost:3000/v1/nucleus'))


In [19]:
job = response
job.sleep_until_complete()
print(job.status())

Status at Fri Aug 13 14:28:43 2021: {'job_id': 'job_c4bceptpfpdcw10d1hc0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 14:28:51 2021: {'job_id': 'job_c4bceptpfpdcw10d1hc0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 14:29:02 2021: {'job_id': 'job_c4bceptpfpdcw10d1hc0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 14:29:21 2021: {'job_id': 'job_c4bceptpfpdcw10d1hc0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 14:30:01 2021: {'job_id': 'job_c4bceptpfpdcw10d1hc0', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 14:30:07 2021: {'job_id': 'job_c4bceptpfpdcw10d1hc0', 'status': 'Runn

In [29]:
small_scene_ref_id = f"small-scene-{scene_number}"
small_scene = LidarScene(small_scene_ref_id)

for image_path in image_paths:
    image_url = s3_sign(BUCKET, image_path)
    
    tokens = re.split('/|\.', image_path)
    frame_idx = int(tokens[-2])
    if frame_idx >= 10:
        continue
    
    sensor_name = tokens[-3]
    reference_id = f"scene-{scene_number}-frame-{frame_idx}-{sensor_name}"
    
    item = DatasetItem(image_location=image_url, reference_id=reference_id)
    small_scene.add_item(frame_idx, sensor_name, item)

for pointcloud_path in pointcloud_paths:
    pointcloud_url = s3_sign(BUCKET, pointcloud_path)
    
    tokens = re.split('/|\.', pointcloud_path)
    frame_idx = int(tokens[-2])
    if frame_idx >= 10:
        continue
    
    sensor_name = tokens[-3]
    reference_id = f"scene-{scene_number}-frame-{frame_idx}-{sensor_name}"
    
    item = DatasetItem(pointcloud_location=pointcloud_url, reference_id=reference_id)
    small_scene.add_item(frame_idx, sensor_name, item)

In [31]:
print("number of frames:", len(small_scene.frames_dict))
small_frame_0 = small_scene.frames_dict[0]
print("number of sensors per frame:", len(small_frame_0.items.values()))
print(small_frame_0)

number of frames: 10
number of sensors per frame: 7
Frame(items={'back_camera': DatasetItem(image_location='https://scale-ml.s3.amazonaws.com/pandaset/023/camera/back_camera/00.jpg?AWSAccessKeyId=ASIAUPBNRLRVNWJG7NWZ&Signature=wQrsr8GOe4fe963lmQBKOV24Ahc%3D&x-amz-security-token=%22FwoGZXIvYXdzEAUaDMQEnHLBwzxJG%2BfjjSKIAvap9QJe0kt4wFJIUXqN0hW3GyhbkXWX%2BGgbrUT4BuCifd1%2F05c%2BS991vWygpdesygu%2BnaU%2BEGVLCaafpGEJQj9SgdRFKLJIQVquzKeo%2FGiHwJ3bmN6gpnx0tH2QMP23AxfMPatoluFMkNzM0LdyRz%2Bq2WEpE8V0IB4tZvnsq9LjIfWijBKEUl6oDpZfWMK1IneJQQVolFylaBHmJ2BH1nqwiKFzYqMIQ1yKzb4bM9Yx7O5kpkD6HP36Mx%2FzK7GqvHiSQK58m2XXhCoBZLZfYWa01mJrUJLngZ6129qXPRtxvW5y%2FzlhCDKcSyrpxXId3wsWLy1mpejzoAT0RWXhYFjXmsGmjGwABii3hNuIBjIra4553v6AxHGTwEAnoM%2FAtyqH7Mm1EeLcWGWqWGhSbD5tbyrH4zRcN8YfDA%3D%3D%22&Expires=1628961324', reference_id='scene-023-frame-0-back_camera', item_id=None, metadata=None, pointcloud_location=None), 'front_camera': DatasetItem(image_location='https://scale-ml.s3.amazonaws.com/pandaset/023/camera/front_c

In [32]:
# Upload small scene (10 frames with 1 pointcloud + 6 images per frame)
small_scenes = [small_scene]
small_scenes_job = dataset.append(small_scenes, asynchronous=True)
print(small_scenes_job)

AsyncJob(job_id='job_c4be21rpfpdd2d13cz1g', job_last_known_status='Running', job_type='uploadLidarScene', job_creation_time='2021-08-13T21:15:51.645Z', client=NucleusClient(api_key='live_318209d04e3746dbafbe1f195a4a1872', use_notebook=False, endpoint='http://localhost:3000/v1/nucleus'))


In [33]:
small_scenes_job.sleep_until_complete()

Status at Fri Aug 13 16:16:29 2021: {'job_id': 'job_c4be21rpfpdd2d13cz1g', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 16:16:35 2021: {'job_id': 'job_c4be21rpfpdd2d13cz1g', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 16:16:40 2021: {'job_id': 'job_c4be21rpfpdd2d13cz1g', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 16:16:46 2021: {'job_id': 'job_c4be21rpfpdd2d13cz1g', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 16:16:51 2021: {'job_id': 'job_c4be21rpfpdd2d13cz1g', 'status': 'Running', 'message': {'status_log': 'No additional information can be provided at this time.'}}
Status at Fri Aug 13 16:16:57 2021: {'job_id': 'job_c4be21rpfpdd2d13cz1g', 'status': 'Runn

In [None]:
print(small_scenes_job.status())

In [None]:
# read in annotations from S3
# upload cuboid annotations
dataset.annotate(annotations)

# predictions = annotations + noise
# create model run
# upload cuboid predictions

In [None]:
# show scene construction by frame
# call scene.add_frame
