-
Notifications
You must be signed in to change notification settings - Fork 617
/
export_kitti.py
362 lines (300 loc) · 16.1 KB
/
export_kitti.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# nuScenes dev-kit.
# Code written by Holger Caesar, 2019.
"""
This script converts nuScenes data to KITTI format and KITTI results to nuScenes.
It is used for compatibility with software that uses KITTI-style annotations.
We do not encourage this, as:
- KITTI has only front-facing cameras, whereas nuScenes has a 360 degree horizontal fov.
- KITTI has no radar data.
- The nuScenes database format is more modular.
- KITTI fields like occluded and truncated cannot be exactly reproduced from nuScenes data.
- KITTI has different categories.
Limitations:
- We don't specify the KITTI imu_to_velo_kitti projection in this code base.
- We map nuScenes categories to nuScenes detection categories, rather than KITTI categories.
- Attributes are not part of KITTI and therefore set to '' in the nuScenes result format.
- Velocities are not part of KITTI and therefore set to 0 in the nuScenes result format.
- This script uses the `train` and `val` splits of nuScenes, whereas standard KITTI has `training` and `testing` splits.
This script includes three main functions:
- nuscenes_gt_to_kitti(): Converts nuScenes GT annotations to KITTI format.
- render_kitti(): Render the annotations of the (generated or real) KITTI dataset.
- kitti_res_to_nuscenes(): Converts a KITTI detection result to the nuScenes detection results format.
To launch these scripts run:
- python export_kitti.py nuscenes_gt_to_kitti --nusc_kitti_dir ~/nusc_kitti
- python export_kitti.py render_kitti --nusc_kitti_dir ~/nusc_kitti --render_2d False
- python export_kitti.py kitti_res_to_nuscenes --nusc_kitti_dir ~/nusc_kitti
Note: The parameter --render_2d specifies whether to draw 2d or 3d boxes.
To work with the original KITTI dataset, use these parameters:
--nusc_kitti_dir /data/sets/kitti --split training
See https://www.nuscenes.org/object-detection for more information on the nuScenes result format.
"""
import json
import os
from typing import List, Dict, Any
import fire
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
from pyquaternion import Quaternion
from nuscenes.eval.detection.utils import category_to_detection_name
from nuscenes.nuscenes import NuScenes
from nuscenes.utils.data_classes import LidarPointCloud, Box
from nuscenes.utils.geometry_utils import BoxVisibility, transform_matrix
from nuscenes.utils.kitti import KittiDB
from nuscenes.utils.splits import create_splits_logs
class KittiConverter:
def __init__(self,
nusc_kitti_dir: str = '~/nusc_kitti',
cam_name: str = 'CAM_FRONT',
lidar_name: str = 'LIDAR_TOP',
image_count: int = 10,
nusc_version: str = 'v1.0-mini',
split: str = 'mini_train'):
"""
:param nusc_kitti_dir: Where to write the KITTI-style annotations.
:param cam_name: Name of the camera to export. Note that only one camera is allowed in KITTI.
:param lidar_name: Name of the lidar sensor.
:param image_count: Number of images to convert.
:param nusc_version: nuScenes version to use.
:param split: Dataset split to use.
"""
self.nusc_kitti_dir = os.path.expanduser(nusc_kitti_dir)
self.cam_name = cam_name
self.lidar_name = lidar_name
self.image_count = image_count
self.nusc_version = nusc_version
self.split = split
# Create nusc_kitti_dir.
if not os.path.isdir(self.nusc_kitti_dir):
os.makedirs(self.nusc_kitti_dir)
# Select subset of the data to look at.
self.nusc = NuScenes(version=nusc_version)
def nuscenes_gt_to_kitti(self) -> None:
"""
Converts nuScenes GT annotations to KITTI format.
"""
kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse
imsize = (1600, 900)
token_idx = 0 # Start tokens from 0.
# Get assignment of scenes to splits.
split_logs = create_splits_logs(self.split, self.nusc)
# Create output folders.
label_folder = os.path.join(self.nusc_kitti_dir, self.split, 'label_2')
calib_folder = os.path.join(self.nusc_kitti_dir, self.split, 'calib')
image_folder = os.path.join(self.nusc_kitti_dir, self.split, 'image_2')
lidar_folder = os.path.join(self.nusc_kitti_dir, self.split, 'velodyne')
for folder in [label_folder, calib_folder, image_folder, lidar_folder]:
if not os.path.isdir(folder):
os.makedirs(folder)
# Use only the samples from the current split.
sample_tokens = self._split_to_samples(split_logs)
sample_tokens = sample_tokens[:self.image_count]
tokens = []
for sample_token in sample_tokens:
# Get sample data.
sample = self.nusc.get('sample', sample_token)
sample_annotation_tokens = sample['anns']
cam_front_token = sample['data'][self.cam_name]
lidar_token = sample['data'][self.lidar_name]
# Retrieve sensor records.
sd_record_cam = self.nusc.get('sample_data', cam_front_token)
sd_record_lid = self.nusc.get('sample_data', lidar_token)
cs_record_cam = self.nusc.get('calibrated_sensor', sd_record_cam['calibrated_sensor_token'])
cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token'])
# Combine transformations and convert to KITTI format.
# Note: cam uses same conventions in KITTI and nuScenes.
lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion(cs_record_lid['rotation']),
inverse=False)
ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion(cs_record_cam['rotation']),
inverse=True)
velo_to_cam = np.dot(ego_to_cam, lid_to_ego)
# Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix)
# Currently not used.
imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values.
r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values.
# Projection matrix.
p_left_kitti = np.zeros((3, 4))
p_left_kitti[:3, :3] = cs_record_cam['camera_intrinsic'] # Cameras are always rectified.
# Create KITTI style transforms.
velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
velo_to_cam_trans = velo_to_cam_kitti[:3, 3]
# Check that the rotation has the same format as in KITTI.
assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all()
assert (velo_to_cam_trans[1:3] < 0).all()
# Retrieve the token from the lidar.
# Note that this may be confusing as the filename of the camera will include the timestamp of the lidar,
# not the camera.
filename_cam_full = sd_record_cam['filename']
filename_lid_full = sd_record_lid['filename']
# token = '%06d' % token_idx # Alternative to use KITTI names.
token_idx += 1
# Convert image (jpg to png).
src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full)
dst_im_path = os.path.join(image_folder, sample_token + '.png')
if not os.path.exists(dst_im_path):
im = Image.open(src_im_path)
im.save(dst_im_path, "PNG")
# Convert lidar.
# Note that we are only using a single sweep, instead of the commonly used n sweeps.
src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full)
dst_lid_path = os.path.join(lidar_folder, sample_token + '.bin')
assert not dst_lid_path.endswith('.pcd.bin')
pcl = LidarPointCloud.from_file(src_lid_path)
pcl.rotate(kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame.
with open(dst_lid_path, "w") as lid_file:
pcl.points.T.tofile(lid_file)
# Add to tokens.
tokens.append(sample_token)
# Create calibration file.
kitti_transforms = dict()
kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values.
kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values.
kitti_transforms['P2'] = p_left_kitti # Left camera transform.
kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values.
kitti_transforms['R0_rect'] = r0_rect.rotation_matrix # Cameras are already rectified.
kitti_transforms['Tr_velo_to_cam'] = np.hstack((velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1)))
kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti
calib_path = os.path.join(calib_folder, sample_token + '.txt')
with open(calib_path, "w") as calib_file:
for (key, val) in kitti_transforms.items():
val = val.flatten()
val_str = '%.12e' % val[0]
for v in val[1:]:
val_str += ' %.12e' % v
calib_file.write('%s: %s\n' % (key, val_str))
# Write label file.
label_path = os.path.join(label_folder, sample_token + '.txt')
if os.path.exists(label_path):
print('Skipping existing file: %s' % label_path)
continue
else:
print('Writing file: %s' % label_path)
with open(label_path, "w") as label_file:
for sample_annotation_token in sample_annotation_tokens:
sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token)
# Get box in LIDAR frame.
_, box_lidar_nusc, _ = self.nusc.get_sample_data(lidar_token, box_vis_level=BoxVisibility.NONE,
selected_anntokens=[sample_annotation_token])
box_lidar_nusc = box_lidar_nusc[0]
# Truncated: Set all objects to 0 which means untruncated.
truncated = 0.0
# Occluded: Set all objects to full visibility as this information is not available in nuScenes.
occluded = 0
# Convert nuScenes category to nuScenes detection challenge category.
detection_name = category_to_detection_name(sample_annotation['category_name'])
# Skip categories that are not part of the nuScenes detection challenge.
if detection_name is None:
continue
# Convert from nuScenes to KITTI box format.
box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect)
# Project 3d box to 2d box in image, ignore box if it does not fall inside.
bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize)
if bbox_2d is None:
continue
# Set dummy score so we can use this file as result.
box_cam_kitti.score = 0
# Convert box to output string format.
output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d,
truncation=truncated, occlusion=occluded)
# Write to disk.
label_file.write(output + '\n')
def render_kitti(self, render_2d: bool) -> None:
"""
Renders the annotations in the KITTI dataset from a lidar and a camera view.
:param render_2d: Whether to render 2d boxes (only works for camera data).
"""
if render_2d:
print('Rendering 2d boxes from KITTI format')
else:
print('Rendering 3d boxes projected from 3d KITTI format')
# Load the KITTI dataset.
kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split,))
# Create output folder.
render_dir = os.path.join(self.nusc_kitti_dir, 'render')
if not os.path.isdir(render_dir):
os.mkdir(render_dir)
# Render each image.
for token in kitti.tokens[:self.image_count]:
for sensor in ['lidar', 'camera']:
out_path = os.path.join(render_dir, '%s_%s.png' % (token, sensor))
print('Rendering file to disk: %s' % out_path)
kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path, render_2d=render_2d)
plt.close() # Close the windows to avoid a warning of too many open windows.
def kitti_res_to_nuscenes(self, meta: Dict[str, bool] = None) -> None:
"""
Converts a KITTI detection result to the nuScenes detection results format.
:param meta: Meta data describing the method used to generate the result. See nuscenes.org/object-detection.
"""
# Dummy meta data, please adjust accordingly.
if meta is None:
meta = {
'use_camera': False,
'use_lidar': True,
'use_radar': False,
'use_map': False,
'use_external': False,
}
# Init.
results = {}
# Load the KITTI dataset.
kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split, ))
# Get assignment of scenes to splits.
split_logs = create_splits_logs(self.split, self.nusc)
# Use only the samples from the current split.
sample_tokens = self._split_to_samples(split_logs)
sample_tokens = sample_tokens[:self.image_count]
for sample_token in sample_tokens:
# Get the KITTI boxes we just generated in LIDAR frame.
kitti_token = '%s_%s' % (self.split, sample_token)
boxes = kitti.get_boxes(token=kitti_token)
# Convert KITTI boxes to nuScenes detection challenge result format.
sample_results = [self._box_to_sample_result(sample_token, box) for box in boxes]
# Store all results for this image.
results[sample_token] = sample_results
# Store submission file to disk.
submission = {
'meta': meta,
'results': results
}
submission_path = os.path.join(self.nusc_kitti_dir, 'submission.json')
print('Writing submission to: %s' % submission_path)
with open(submission_path, 'w') as f:
json.dump(submission, f, indent=2)
def _box_to_sample_result(self, sample_token: str, box: Box, attribute_name: str = '') -> Dict[str, Any]:
# Prepare data
translation = box.center
size = box.wlh
rotation = box.orientation.q
velocity = box.velocity
detection_name = box.name
detection_score = box.score
# Create result dict
sample_result = dict()
sample_result['sample_token'] = sample_token
sample_result['translation'] = translation.tolist()
sample_result['size'] = size.tolist()
sample_result['rotation'] = rotation.tolist()
sample_result['velocity'] = velocity.tolist()[:2] # Only need vx, vy.
sample_result['detection_name'] = detection_name
sample_result['detection_score'] = detection_score
sample_result['attribute_name'] = attribute_name
return sample_result
def _split_to_samples(self, split_logs: List[str]) -> List[str]:
"""
Convenience function to get the samples in a particular split.
:param split_logs: A list of the log names in this split.
:return: The list of samples.
"""
samples = []
for sample in self.nusc.sample:
scene = self.nusc.get('scene', sample['scene_token'])
log = self.nusc.get('log', scene['log_token'])
logfile = log['logfile']
if logfile in split_logs:
samples.append(sample['token'])
return samples
if __name__ == '__main__':
fire.Fire(KittiConverter)