In [None]:
import os
import glob
import pandas as pd
import numpy as np
from datetime import datetime
from lyft_dataset_sdk.lyftdataset import LyftDataset
from lyft_dataset_sdk.utils.data_classes import LidarPointCloud, Box, Quaternion
from lyft_dataset_sdk.utils.geometry_utils import view_points, transform_matrix

In [None]:
input_dir = '/run/media/hoosiki/WareHouse1/mtb/datasets/lyft-3d-od'
output_dir = '../data'

In [None]:
lyft_dataset = LyftDataset(data_path=os.path.join(input_dir, 'train'), json_path=os.path.join(input_dir, 'train', 'data'), verbose=True)

In [None]:
scene_records = [(lyft_dataset.get('sample', scene['first_sample_token'])['timestamp'], scene) for scene in lyft_dataset.scene]
scene_records

In [None]:
scene_entries = []

for start_time, scene in sorted(scene_records):
    
    # ex) token: 473093b48a7cb78d05e36245fd2dbd12d66ded7dab1ecb862945390b8a765c0a
    #     name: host-a007-lidar0-1230485630199365106-1230485655099030186
    #     date: date: 2019-01-02 17:33:50.301987
    #     host: host-a007
    #     first_sample_token: c7f7de87ec90c8993d4e7d5463208d2aa9f5ecde671960536f39b9a86f939d3c
    start_time = lyft_dataset.get('sample', scene['first_sample_token'])['timestamp'] / 1e+6
    token = scene['token']
    name = scene['name']
    date = datetime.utcfromtimestamp(start_time)
    host = '-'.join(name.split('-')[:2])
    first_sample_token = scene['first_sample_token']
    scene_entries.append((host, name, date, token, first_sample_token))
    
df_scene = pd.DataFrame(scene_entries, columns=['host', 'scene_name', 'date', 'scene_token', 'first_sample_token'])
df_scene

In [None]:
df_host_count = df_scene.groupby('host')['scene_token'].count()
df_host_count

In [None]:
# Let's split the data by car to get a validation set.
# Alternatively, we could consider doing it by scenes, date or completely randomly.
validation_hosts = ['host-a007', 'host-a008', 'host-a009']
df_valid = df_scene[df_scene['host'].isin(validation_hosts)]
vi = df_valid.index
df_train = df_scene[~df_scene.index.isin(vi)]

print(len(df_train), len(df_valid), "train/validation split scene counts")

In [None]:
df_train.to_csv(os.path.join(output_dir, 'train.csv'))
df_valid.to_csv(os.path.join(output_dir, 'valid.csv'))