# Synthetic Optical Flow from Fused Lidar


In [1]:
import sys
sys.path.append('/opt/psegs')

import numpy as np

from psegs.exp.fused_lidar_flow import FusedLidarCloudTableBase
from psegs.exp.fused_lidar_flow import TaskLidarCuboidCameraDFFactory
from psegs.exp.fused_lidar_flow import OpticalFlowRenderBase

import IPython.display
import PIL.Image


## General Notebook Utilities
    
def imshow(x):
    IPython.display.display(PIL.Image.fromarray(x))

def show_html(x):
    from IPython.core.display import display, HTML
    display(HTML(x))

## SemanticKITTI

In [2]:
from psegs.exp.semantic_kitti import SemanticKITTISDTable

class SemanticKITTILCCDFFactory(TaskLidarCuboidCameraDFFactory):
    
    SRC_SD_TABLE = SemanticKITTISDTable
    
    @classmethod
    def build_df_for_segment(cls, spark, segment_uri):
        seg_rdd = cls.SRC_SD_TABLE.get_segment_datum_rdd(spark, segment_uri)
        
        def to_task_row(scan_id_iter_sds):
            scan_id, iter_sds = scan_id_iter_sds
            camera_images = []
            point_clouds = []
            for sd in iter_sds:
                if sd.camera_image is not None:
                    camera_images.append(sd)
                elif sd.point_cloud is not None:
                    point_clouds.append(sd)
            
            from pyspark import Row
            r = Row(
                    task_id=int(scan_id),
                    pc_sds=point_clouds,
                    cuboids_sds=[], # SemanticKITTI has no cuboids
                    ci_sds=camera_images) 
            from oarphpy.spark import RowAdapter
            return RowAdapter.to_row(r)
            
        grouped = seg_rdd.groupBy(lambda sd: sd.uri.extra['semantic_kitti.scan_id'])
        row_rdd = grouped.map(to_task_row)

        df = spark.createDataFrame(row_rdd, schema=cls.table_schema())
        df = df.persist()
        return df

class SemanticKITTIFusedWorldCloudTable(FusedLidarCloudTableBase):
    TASK_DF_FACTORY = SemanticKITTILCCDFFactory

    # SemanticKITTI has no cuboids, so we skip this step.
    HAS_OBJ_CLOUDS = False

class SemanticKITTIOFlowRenderer(OpticalFlowRenderBase):
    FUSED_LIDAR_SD_TABLE = SemanticKITTIFusedWorldCloudTable

## KITTI-360

In [3]:
from psegs.datasets.kitti_360 import KITTI360SDTable
class KITTI360OurFusedClouds(KITTI360SDTable):
    INCLUDE_FISHEYES = False
    INCLUDE_FUSED_CLOUDS = False  # Use our own fused clouds

class KITTI360OurFusedWorldCloudTable(FusedLidarCloudTableBase):
    SRC_SD_TABLE = KITTI360OurFusedClouds
    
    @classmethod
    def _get_task_lidar_cuboid_rdd(cls, spark, segment_uri):
        datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)
        datum_df.registerTempTable('datums')
        spark.catalog.dropTempView('culi_tasks_df')
        print('Building tasks table for %s ...' % segment_uri.segment_id)
        spark.sql("""
          CACHE TABLE culi_tasks_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS
          SELECT 
              CONCAT(uri.segment_id, '.', uri.extra.`kitti-360.frame_id`) AS task_id,
              FLATTEN(COLLECT_LIST(cuboids)) AS cuboids, 
              COLLECT_LIST(point_cloud) AS point_clouds
          FROM datums
          WHERE 
              uri.topic LIKE '%cuboid%' OR uri.topic LIKE '%lidar%'
          GROUP BY task_id
        """)
        
        
        # TODO! for lidar and camera image!
        #         both_have_ego_pose = (
        #             ci1.extra.get('kitti-360.has-valid-ego-pose') and
        #             ci2.extra.get('kitti-360.has-valid-ego-pose'))
        
        tasks_df = spark.sql('SELECT * FROM culi_tasks_df')
        print('... done.')
        return tasks_df.rdd


## NuScenes

In [39]:
# !pip3 install nuscenes-devkit==1.1.2
from psegs.datasets.nuscenes import NuscStampedDatumTableBase
from psegs.datasets.nuscenes import NuscStampedDatumTableLabelsAllFrames





class NuscKFOnlyLCCDFFactory(TaskLidarCuboidCameraDFFactory):
    
    SRC_SD_TABLE = NuscStampedDatumTableBase
    
    @classmethod
    def build_df_for_segment(cls, spark, segment_uri):
        datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)
        datum_df.registerTempTable('datums')
        spark.catalog.dropTempView('nusc_task_df')
        print('Building tasks table for %s ...' % segment_uri.segment_id)
        
        # Nusc doesn't have numerical task_ids so we'll have to induce
        # one via lidar timestamp.
        # NB: for Nusc: can group by nuscenes-sample-token FOR KEYFRAMES-ONLY DATA
        task_data_df = spark.sql("""
            SELECT 
              COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) 
                  FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
              COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) 
                  FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,
              COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) 
                  FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds,
              MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS lidar_time,
              FIRST(uri.extra.`nuscenes-sample-token`) AS sample_token
            FROM datums
            WHERE 
            uri.extra.`nuscenes-is-keyframe` = 'True' AND (
              uri.extra['nuscenes-label-channel'] is NULL OR 
              uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'
            ) AND (
              uri.topic LIKE '%cuboid%' OR
              uri.topic LIKE '%lidar%' OR
              uri.topic LIKE '%camera%'
            )
            GROUP BY uri.extra.`nuscenes-sample-token`
            ORDER BY lidar_time
        """)
        sample_tokens_ordered = [r.sample_token for r in df.select('sample_token').collect()]
        task_to_stoken = [
            {'task_id': task_id, 'sample_token': sample_token}
            for task_id, sample_token in enumerate(sample_tokens_ordered)
        ]
        task_id_rdd = spark.sparkContext.parallelize(task_to_stoken)
        task_id_df = spark.createDataFrame(task_id_rdd)
        tasks_df = df.join(task_id_df, on=['sample_token'], how='inner')
        tasks_df = tasks_df.persist()
        print('... done.')
        return tasks_df


class NuscWorldCloudTableBase(FusedLidarCloudTableBase):
    SPLITS = ['train_detect', 'train_track']
    
    @classmethod
    def _filter_ego_vehicle(cls, cloud_ego):
        # Note: NuScenes authors have already corrected clouds for ego motion:
        # https://github.com/nutonomy/nuscenes-devkit/issues/481#issuecomment-716250423
        # But have not filtered out ego self-returns
        cloud_ego = cloud_ego[np.where(  ~(
                        (cloud_ego[:, 0] <= 1.5) & (cloud_ego[:, 0] >= -1.5) &  # Nusc lidar +x is +right
                        (cloud_ego[:, 1] <= 2.5) & (cloud_ego[:, 0] >= -2.5) &  # Nusc lidar +y is +forward
                        (cloud_ego[:, 1] <= 1.5) & (cloud_ego[:, 0] >= -1.5)    # Nusc lidar +z is +up
        ))]
        return cloud_ego
    
class NuscKFOnlyFusedWorldCloudTable(NuscWorldCloudTableBase):
    TASK_DF_FACTORY = NuscKFOnlyLCCDFFactory
    
    
    
#     task_id=int(scan_id),
#                     pc_sds=point_clouds,
#                     cuboids_sds=[], # SemanticKITTI has no cuboids
#                     ci_sds=camera_images
#     @classmethod
#     def _get_task_lidar_cuboid_rdd(cls, spark, segment_uri):
#         datum_df = cls.SRC_SD_TABLE.get_segment_datum_df(spark, segment_uri)
#         datum_df.registerTempTable('datums')
#         spark.catalog.dropTempView('nusc_task_df')
#         print('Building tasks table for %s ...' % segment_uri.segment_id)
        
#         # Nusc doesn't have numerical task_ids so we'll have to induce
#         # one via lidar timestamp.
#         if cls.SRC_SD_TABLE.LABELS_KEYFRAMES_ONLY:
#             # For Nusc: group by nuscenes-sample-token WITH KEYFRAMES
#             spark.sql("""
#               CACHE TABLE nusc_task_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS
#               SELECT 
#                   MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS task_id,
#                   COLLECT_LIST(*) FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
#                   COLLECT_LIST(*) FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,
#                   COLLECT_LIST(*) FILTER (WHERE uri.topic LIKE '%cam%') AS ci_sds
#               FROM datums
#               WHERE 
#                 uri.extra.`nuscenes-is-keyframe` = 'True' AND (
#                   uri.extra['nuscenes-label-channel'] is NULL OR 
#                   uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%' OR
#                   uri.extra['nuscenes-label-channel'] LIKE '%CAM%'
#                 ) AND (
#                   uri.topic LIKE '%cuboid%' OR
#                   uri.topic LIKE '%lidar%' OR
#                   uri.topic LIKE '%cam%'
#                 )
#               GROUP BY task_id
#             """)
#         else:
#             # For Nusc: group by nuscenes-sample-token WITH ALL FRAMES
#             spark.sql("""
#               CACHE TABLE nusc_task_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS
#               SELECT 
#                   CONCAT(uri.segment_id, '.', uri.timestamp) AS task_id,
#                   FLATTEN(COLLECT_LIST(cuboids)) AS cuboids, 
#                   COLLECT_LIST(point_cloud) AS point_clouds
#               FROM datums
#               WHERE 
#                 (
#                   uri.extra['nuscenes-label-channel'] is NULL OR 
#                   uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'
#                 ) AND (
#                   uri.topic LIKE '%cuboid%' OR
#                   uri.topic LIKE '%lidar%'
#                 )
#               GROUP BY task_id
#               HAVING SIZE(cuboids) > 0 AND SIZE(point_clouds) > 0
#             """)
# #             spark.sql("""
# #               CACHE TABLE culi_tasks_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS
# #               SELECT 
# #                   CONCAT(uri.segment_id, '.', uri.timestamp) AS task_id,
# #                   FLATTEN(COLLECT_LIST(cuboids)) AS cuboids, 
# #                   COLLECT_LIST(point_cloud) AS point_clouds
# #               FROM datums
# #               WHERE 
# #                 (
# #                   uri.extra['nuscenes-label-channel'] is NULL OR 
# #                   uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'
# #                 ) AND (
# #                   uri.topic LIKE '%cuboid%' OR
# #                   uri.topic LIKE '%lidar%'
# #                 )
# #               GROUP BY task_id
# #               HAVING SIZE(cuboids) > 0 AND SIZE(point_clouds) > 0
# #             """)
        
#         tasks_df = spark.sql('SELECT * FROM nusc_task_df')
#         print('... done.')
#         return tasks_df.rdd

# class NuscFusedWorldCloudKeyframesOnlyTable(NuscFusedWorldCloudTableBase):
#     SRC_SD_TABLE = NuscStampedDatumTableBase

# class NuscFusedWorldCloudAllFramesTable(NuscFusedWorldCloudTableBase):
#     SRC_SD_TABLE = NuscStampedDatumTableLabelsAllFrames
    
class NuscKeyframesOFlowRenderer(OpticalFlowRenderBase):
    FUSED_LIDAR_SD_TABLE = NuscKFOnlyFusedWorldCloudTable

# class NuscAllFramesOFlowRenderer(OpticalFlowRenderBase):
#     FUSED_LIDAR_SD_TABLE = NuscFusedWorldCloudAllFramesTable

## Start Spark

In [5]:
from psegs.spark import NBSpark
spark = NBSpark.getOrCreate()

2021-02-21 22:55:27,893	oarph 3599341 : Using source root /opt/psegs/psegs 
2021-02-21 22:55:27,894	oarph 3599341 : Using source root /opt/psegs 
2021-02-21 22:55:27,978	oarph 3599341 : Generating egg to /tmp/tmp65b2h99d_oarphpy_eggbuild ...
2021-02-21 22:55:28,061	oarph 3599341 : ... done.  Egg at /tmp/tmp65b2h99d_oarphpy_eggbuild/psegs-0.0.0-py3.8.egg


## Build Fused Lidar Assets

```
docker --context default run -it --name=potree_viewer --rm --net=host -v `pwd`:/shared  jonazpiazu/potree
```

In [6]:
# T = KITTI360OurFusedWorldCloudTable
# rdds = T._create_datum_rdds(spark)
# print([r.count() for r in rdds])

# seg_uris = T.get_all_segment_uris()
# samp = T.get_sample(seg_uris[0], spark=spark)


In [7]:
# print([lc.sensor_name for lc in samp.lidar_clouds][:10])
# c = samp.lidar_clouds[0]#[lc for lc in samp.lidar_clouds if lc.sensor_name == '11002'][0]
# print(c.get_cloud().shape)
# imshow(c.get_bev_debug_image(x_bounds_meters=None, y_bounds_meters=None))
# imshow(c.get_front_rv_debug_image(y_bounds_meters=None, z_bounds_meters=None))

## Compute Candidate Optical Flow Pairs

## Render Optical Flow

In [None]:
R = NuscKeyframesOFlowRenderer
seg_uris = R.FUSED_LIDAR_SD_TABLE.get_all_segment_uris()
R.build(spark=spark, only_segments=[seg_uris[0]])

2021-02-21 23:24:52,058	ps   3599341 : Filtering to only 1 segments


Building tasks table for scene-1013 ...
... done.


2021-02-21 23:25:08,608	ps   3599341 : Filtering to only 1 segments
2021-02-21 23:25:08,609	ps   3599341 : NuscKFOnlyFusedWorldCloudTable building fused clouds ...
2021-02-21 23:25:08,610	ps   3599341 : ... have 1 segments to fuse ...
2021-02-21 23:25:08,611	ps   3599341 : ... working on scene-1013 ...
2021-02-21 23:25:08,612	ps   3599341 : ... skipping scene-1013; world and obj clouds done
2021-02-21 23:25:08,613	ps   3599341 : World Cloud: /opt/psegs/dataroot/fused_world_clouds/naive_cuboid_scrubber/nuscenes-Lkfo+lseg/train_detect/scene-1013/fused_world.ply
2021-02-21 23:25:08,613	ps   3599341 : Obj Clouds: /opt/psegs/dataroot/fused_obj_clouds/naive_cuboid_scrubber/nuscenes-Lkfo+lseg/train_detect/scene-1013
2021-02-21 23:25:08,616	oarph 3599341 : Progress for 
FuseEachSegment [Pid:3599341 Id:139725201085104]
-----------------------  ---------------
Thruput
N thru                   1 (of 1)
N chunks                 1
Total time               0 seconds
Total thru               0 bytes


num tasks 40
running map.. count


In [38]:
# assert False, spark.sql("""select uri.topic t from datums group by t""").show(truncate=False)

# spark.sql("""
#           CACHE TABLE nusc_task_df OPTIONS ( 'storageLevel' 'DISK_ONLY' ) AS
#           SELECT 
#               MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS task_id,
#               COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
#               COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,
#               COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) FILTER (WHERE uri.topic LIKE '%cam%') AS ci_sds
#           FROM datums
#           WHERE 
#             uri.extra.`nuscenes-is-keyframe` = 'True' AND (
#               uri.extra['nuscenes-label-channel'] is NULL OR 
#               uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'
#             ) AND (
#               uri.topic LIKE '%cuboid%' OR
#               uri.topic LIKE '%lidar%' OR
#               uri.topic LIKE '%camera%'
#             )
#           GROUP BY uri.extra.`nuscenes-sample-token`
#         """).show()
# df = spark.sql("""
#           SELECT 
#               COLLECT_LIST(STRUCT(__pyclass__, uri, point_cloud)) 
#                   FILTER (WHERE uri.topic LIKE '%lidar%') AS pc_sds,
#               COLLECT_LIST(STRUCT(__pyclass__, uri, cuboids)) 
#                   FILTER (WHERE uri.topic LIKE '%cuboid%') AS cuboids_sds,
#               COLLECT_LIST(STRUCT(__pyclass__, uri, camera_image)) 
#                   FILTER (WHERE uri.topic LIKE '%camera%') AS ci_sds,
#               MIN(uri.timestamp) FILTER (WHERE uri.topic LIKE '%lidar%') AS lidar_time,
#               FIRST(uri.extra.`nuscenes-sample-token`) AS sample_token
#           FROM datums
#           WHERE 
#             uri.extra.`nuscenes-is-keyframe` = 'True' AND (
#               uri.extra['nuscenes-label-channel'] is NULL OR 
#               uri.extra['nuscenes-label-channel'] LIKE '%LIDAR%'
#             ) AND (
#               uri.topic LIKE '%cuboid%' OR
#               uri.topic LIKE '%lidar%' OR
#               uri.topic LIKE '%camera%'
#             )
#           GROUP BY uri.extra.`nuscenes-sample-token`
#           ORDER BY lidar_time
#         """)
# sample_tokens_ordered = [r.sample_token for r in df.select('sample_token').collect()]
# task_to_stoken = [
#     {'task_id': task_id, 'sample_token': sample_token}
#     for task_id, sample_token in enumerate(sample_tokens_ordered)
# ]
# task_id_rdd = spark.sparkContext.parallelize(task_to_stoken)
# task_id_df = spark.createDataFrame(task_id_rdd)
# df.join(task_id_df, on=['sample_token'], how='inner').orderBy('task_id').show()

+--------------------+--------------------+--------------------+--------------------+-------------------+-------+
|        sample_token|              pc_sds|         cuboids_sds|              ci_sds|         lidar_time|task_id|
+--------------------+--------------------+--------------------+--------------------+-------------------+-------+
|9733299c72954c6fb...|[[psegs.datum.sta...|[[psegs.datum.sta...|[[psegs.datum.sta...|1542193832147652000|      0|
|8928669895174b1ea...|[[psegs.datum.sta...|[[psegs.datum.sta...|[[psegs.datum.sta...|1542193832647546000|      1|
|e5e8907e7dca4f6ea...|[[psegs.datum.sta...|[[psegs.datum.sta...|[[psegs.datum.sta...|1542193833197705000|      2|
|3a1fadfc6bfa45d49...|[[psegs.datum.sta...|[[psegs.datum.sta...|[[psegs.datum.sta...|1542193833698143000|      3|
|8801fd25ac25490db...|[[psegs.datum.sta...|[[psegs.datum.sta...|[[psegs.datum.sta...|1542193834198008000|      4|
|126177e74e854a329...|[[psegs.datum.sta...|[[psegs.datum.sta...|[[psegs.datum.sta...|154