In [None]:
import os
import sys
import glob
import pandas as pd
import numpy as np
from datetime import datetime
from functools import partial
from multiprocessing import Pool
from tqdm import tqdm, tqdm_notebook
from lyft_dataset_sdk.lyftdataset import LyftDataset
from lyft_dataset_sdk.utils.data_classes import LidarPointCloud, Box, Quaternion
from lyft_dataset_sdk.utils.geometry_utils import view_points, transform_matrix

sys.path.insert(0, '/home/mtb/ongoing_analysis/lyft-3d-od')
from config import config as cfg

In [None]:
input_dir = cfg.input_dir
output_dir = '../data'

In [None]:
lyft_dataset = LyftDataset(data_path=os.path.join(input_dir, 'train'), json_path=os.path.join(input_dir, 'train', 'data'), verbose=True)

In [None]:
scene_records = [(lyft_dataset.get('sample', scene['first_sample_token'])['timestamp'], scene) for scene in lyft_dataset.scene]

In [None]:
scene_entries = []

for start_time, scene in sorted(scene_records):
    # ex) token: 473093b48a7cb78d05e36245fd2dbd12d66ded7dab1ecb862945390b8a765c0a
    #     name: host-a007-lidar0-1230485630199365106-1230485655099030186
    #     date: date: 2019-01-02 17:33:50.301987
    #     host: host-a007
    #     first_sample_token: c7f7de87ec90c8993d4e7d5463208d2aa9f5ecde671960536f39b9a86f939d3c
    start_time = lyft_dataset.get('sample', scene['first_sample_token'])['timestamp'] / 1e+6
    token = scene['token']
    name = scene['name']
    date = datetime.utcfromtimestamp(start_time)
    host = '-'.join(name.split('-')[:2])
    first_sample_token = scene['first_sample_token']
    scene_entries.append((host, name, date, token, first_sample_token))
    
df_scene = pd.DataFrame(scene_entries, columns=['host', 'scene_name', 'date', 'scene_token', 'first_sample_token'])

In [None]:
df_host_count = df_scene.groupby('host')['scene_token'].count()
df_host_count

In [None]:
# Let's split the data by car to get a validation set.
# Alternatively, we could consider doing it by scenes, date or completely randomly.
validation_hosts = ['host-a007', 'host-a008', 'host-a009']
df_scene_valid = df_scene[df_scene['host'].isin(validation_hosts)]
vi = df_scene_valid.index
df_scene_train = df_scene[~df_scene.index.isin(vi)]

print(len(df_scene_train), len(df_scene_valid), "train/validation split scene counts")

In [None]:
df_scene_train

In [None]:
def prepare_training_data_for_scene(entries, scene_token, first_sample_token):
    """
    Given a first sample token (in a scene), output rasterized input volumes and targets in voxel unit perspective.
    """
    sample_token = first_sample_token
    while sample_token:
        sample = lyft_dataset.get('sample', sample_token)
        sample_lidar_token = sample['data']['LIDAR_TOP']
        lidar_data = lyft_dataset.get('sample_data', sample_lidar_token)
        lidar_filepath = lyft_dataset.get_sample_data_path(sample_lidar_token)

        try:
            lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)
        except Exception as e:
            print('Failed to load Lidar Pointcloud for {}: {}:'.format(sample_token, e))
            sample_token = sample['next']
            continue
        entries.append((scene_token, sample_token))

        sample_token = sample['next']

In [None]:
for i, df_scene in enumerate ([df_scene_train, df_scene_valid]):
    entries = []
    scene_tokens = df_scene.scene_token.values
    first_sample_tokens = df_scene.first_sample_token.values
    
    for j in range(len(scene_tokens)):
        prepare_training_data_for_scene(entries, scene_tokens[j], first_sample_tokens[j])

    if i == 0:
        df_sample_train = pd.DataFrame(entries, columns=['scene_token', 'sample_token'])
        df_sample_train.to_csv(os.path.join(output_dir, 'train.cvs'))
    elif i == 1:
        df_sample_valid = pd.DataFrame(entries, columns=['scene_token', 'sample_token'])
        df_sample_valid.to_csv(os.path.join(output_dir, 'valid.cvs'))



In [None]:
df_sample_train

In [None]:
df_sample_valid