In [3]:
import os

from tqdm import tqdm
from sensus import data

Jupyter environment detected. Enabling Open3D WebVisualizer.
[Open3D INFO] WebRTC GUI backend enabled.
[Open3D INFO] WebRTCWindowSystem: HTTP handshake server disabled.
[Open3D INFO] Resetting default logger to print to terminal.


In [4]:
# Percentage of the dataset to sumsample
p = 0.25
assert 0 < p < 1

In [5]:
dataset_path = os.path.join(data.__path__[0], 'DAIR-V2X',
    'cooperative-vehicle-infrastructure-kittiformat', 'infrastructure-side')
new_dataset_path = os.path.join(data.__path__[0], 'DAIR-V2X',
    f'cooperative-vehicle-infrastructure-kittiformat-sub{int(p*100)}', 
    'infrastructure-side')

dirs_to_copy = ['ImageSets', 'training', 'testing']

In [6]:
for dir_name in dirs_to_copy:
    os.makedirs(os.path.join(new_dataset_path, dir_name), exist_ok=True)

In [7]:
def read_txt_file(file_path):
    with open(file_path, 'r') as f:
        files = f.readlines()
        files = [f.strip() for f in files]
        files.sort()
    return files

train_txt = os.path.join(dataset_path, 'ImageSets', 'train.txt')
val_txt = os.path.join(dataset_path, 'ImageSets', 'val.txt')
trainval_txt = os.path.join(dataset_path, 'ImageSets', 'trainval.txt')
test_txt = os.path.join(dataset_path, 'ImageSets', 'test.txt')

train_files = read_txt_file(train_txt)
val_files = read_txt_file(val_txt)
trainval_files = read_txt_file(trainval_txt)
test_files = read_txt_file(test_txt)

In [8]:
# Make sure the trainval set is the union of the train and val sets
train_set = set(train_files)
val_set = set(val_files)
trainval_set = train_set.union(val_set)
trainval_set = list(trainval_set)
trainval_set.sort()
assert trainval_set == trainval_files

In [9]:
# Sample each set randomly
import random
random.seed(47)

new_train_files = random.sample(train_files, int(p*len(train_files)))
new_val_files = random.sample(val_files, int(p*len(val_files)))
new_test_files = random.sample(test_files, int(p*len(test_files)))
new_train_files.sort()
new_val_files.sort()
new_test_files.sort()

In [10]:
new_trainval_files = set(new_train_files).union(set(new_val_files))
new_trainval_files = list(new_trainval_files)
new_trainval_files.sort()

In [11]:
def save_txt_file(file_path, files):
    with open(file_path, 'w') as f:
        for file in files:
            f.write(file + '\n')

save_txt_file(os.path.join(new_dataset_path, 'ImageSets', 'train.txt'), new_train_files)
save_txt_file(os.path.join(new_dataset_path, 'ImageSets', 'val.txt'), new_val_files)
save_txt_file(os.path.join(new_dataset_path, 'ImageSets', 'trainval.txt'), new_trainval_files)
save_txt_file(os.path.join(new_dataset_path, 'ImageSets', 'test.txt'), new_test_files)

In [12]:
training_path = os.path.join(dataset_path, 'training')
testing_path = os.path.join(dataset_path, 'testing')
new_training_path = os.path.join(new_dataset_path, 'training')
new_testing_path = os.path.join(new_dataset_path, 'testing')

def copy_files(files, src_dir, dst_dir):
    print(f'Copying {len(files)} files from {src_dir.split("/")[-1]} to {dst_dir.split("/")[-1]}')
    for file in tqdm(files):
        src_file = os.path.join(src_dir, file)
        dst_file = os.path.join(dst_dir, file)
        os.system(f'cp {src_file} {dst_file}')

In [13]:
for each_dir in os.listdir(training_path):
    src_dir = os.path.join(training_path, each_dir)
    dst_dir = os.path.join(new_training_path, each_dir)
    os.makedirs(dst_dir, exist_ok=True)

    src_files = os.listdir(src_dir)
    src_files.sort()
    # Get extension of the files
    ext = os.path.splitext(src_files[0])[1]
    new_train_files_wext = [f + ext for f in new_train_files]
    new_val_files_wext = [f + ext for f in new_val_files]

    copy_files(new_train_files_wext, src_dir, dst_dir)
    copy_files(new_val_files_wext, src_dir, dst_dir)

Copying 2200 files from velodyne to velodyne


  0%|          | 0/2200 [00:00<?, ?it/s]

100%|██████████| 2200/2200 [00:09<00:00, 233.03it/s]


Copying 906 files from velodyne to velodyne


100%|██████████| 906/906 [00:04<00:00, 206.86it/s]


Copying 2200 files from image_2 to image_2


100%|██████████| 2200/2200 [00:10<00:00, 210.30it/s]


Copying 906 files from image_2 to image_2


100%|██████████| 906/906 [00:04<00:00, 194.11it/s]


Copying 2200 files from velodyne_reduced to velodyne_reduced


100%|██████████| 2200/2200 [00:11<00:00, 189.14it/s]


Copying 906 files from velodyne_reduced to velodyne_reduced


100%|██████████| 906/906 [00:04<00:00, 223.91it/s]


Copying 2200 files from calib to calib


100%|██████████| 2200/2200 [00:07<00:00, 305.09it/s]


Copying 906 files from calib to calib


100%|██████████| 906/906 [00:03<00:00, 295.75it/s]


Copying 2200 files from label_2 to label_2


100%|██████████| 2200/2200 [00:07<00:00, 283.44it/s]


Copying 906 files from label_2 to label_2


100%|██████████| 906/906 [00:02<00:00, 310.22it/s]
