# Stationary to KITTI format

# NEED TO ADD IMAGES AS WELL AFTER

In [56]:
import os
import random
import shutil
from pathlib import Path

In [57]:
# If false, two of each data will remain, in the all folder, and in the test/train splits
delete_pre_split_folders = False

In [58]:
DATA_DIRECTORY = '../data/'

In [59]:
# Source directories
STATIONARY_DATA_DIR = '../stationary_data'
SRC_LABELS_DIR = 'labels'
SRC_IMAGE_DIR = 'images'
SRC_UNFILTERED_DIR = 'velodyne_points'
SRC_FILTERED_DIR = 'filtered_points'
SRC_LABEL_FILTERD = 'roi_filtered_points'

In [60]:
# Target directories
KITTI_FORMAT_DIR = '../stationary_kitti_format_data'
UNFILTERED_DIR = 'st_kitti_unfiltered'
FILTERED_DIR = 'st_kitti_filtered'
LABEL_FILTERED_DIR = 'st_kitti_label_filtered'

In [61]:
# Target subdirectories
ALL_DIR = 'all'
IMAGESETS_DIR = 'ImageSets'
TRAINING_DIR = 'training'
TESTING_DIR = 'testing'

In [62]:
# Target subsbudirectories
LABELS_DIR = 'label_2'
IMAGE_DIR = 'image_2'
VELODYNE_DIR = 'velodyne'
CALIB_DIR = 'calib'

In [63]:
DATASET_DIRS = [UNFILTERED_DIR, FILTERED_DIR, LABEL_FILTERED_DIR]

In [64]:
DATASET_SUBDIRS = [LABELS_DIR, IMAGE_DIR, VELODYNE_DIR]

In [65]:
PIPELINE_PAIRS = [
    [SRC_UNFILTERED_DIR, UNFILTERED_DIR],
    [SRC_FILTERED_DIR, FILTERED_DIR],
    [SRC_LABEL_FILTERD, LABEL_FILTERED_DIR]
]

In [66]:
kitti_data_structure = {
    'all': {
        'image_2': {},
        'label_2': {},
        'velodyne': {}
    },
    'ImageSets': {},
    'training': {
        'calib': {},
        'image_2': {},
        'label_2': {},
        'velodyne': {}
    },
    'testing': {
        'calib': {},
        'image_2': {},
        'label_2': {},
        'velodyne': {}
    }
}

In [67]:
def create_subdirectories(parent_path, subdirs):
    for subdir, children in subdirs.items():
        path = os.path.join(parent_path, subdir)
        os.makedirs(path, exist_ok=True)  # Create the directory if it doesn't exist
        create_subdirectories(path, children)  # Recursively create child directories

In [68]:
# Make KITTI file tree, one for unfiltered, filtered, and label filtered
def make_file_trees(dataset_dir_name):
    print('making file trees')
    base_path = os.path.join(DATA_DIRECTORY, KITTI_FORMAT_DIR, dataset_dir_name)
    os.makedirs(base_path, exist_ok=True)
    # Start creating directories from the base path
    create_subdirectories(base_path, kitti_data_structure)

In [69]:
def get_valid_file_ids(dir):
    valid_files = []
    # For each folder
    session_folder = Path(dir)

    # This will iterate through all the folders in the session folder
    for data_folder in session_folder.iterdir(): 
        if data_folder.is_dir():
            this_folder_files = []            
            files = [f for f in os.listdir(data_folder) if f.endswith('.bin') or f.endswith('.txt') or f.endswith('.png')]
            # For each file
            # for filename in files[:3]:
            for filename in files:
                # Get the index
                filename = os.path.basename(filename)
                # Split the filename from the extension ('006428', '.txt')
                file_id, extension = os.path.splitext(filename)
                
                this_folder_files.append(file_id)
            
        # Only keep the files that are in both lists
        if len(valid_files) == 0:
            valid_files = this_folder_files
        else:
            valid_files = [file for file in valid_files if file in this_folder_files]
        
    return valid_files

In [70]:
def copy_select_data(source_file, target_file):
    shutil.copy2(source_file, target_file)

In [71]:
# dir is a session folder
def _copy_files_from_stationary_to_kitti_folders(dir, current_index):
    # List of valid files
    valid_files = get_valid_file_ids(dir)

    # If the valid files list is too short
    if len(valid_files) < 2:
        # Return early
        return

    # I don't think I need this because current index is passed by value
    start_index = current_index

    # TRANSFER POINT CLOUDS
    # Keep track of index through this
    # For each of the valid files
    # For each transfer location pair (from, to)
    for pair in PIPELINE_PAIRS:
        # Create the from and to path bases
        from_base_dir_vel = Path(dir, pair[0])
        to_base_path_vel = Path(KITTI_FORMAT_DIR, pair[1], ALL_DIR, VELODYNE_DIR)

        # For each index being transferred
        for id in valid_files:
            # Make the from file id
            from_file_id = str(id).zfill(10) + '.bin'
            # Add the current index to the file index
            offset_id_index = int(id) + current_index
            # Get to file_id
            to_file_id = str(offset_id_index).zfill(6) + '.bin'

            # TRANSFER POINT CLOUDS
            # Create a from and to path bin file paths
            from_file_path = Path(from_base_dir_vel, from_file_id)
            to_file_path = Path(to_base_path_vel, to_file_id)

            # print(from_file_path)
            # print(to_file_path)
            # print()
            copy_select_data(from_file_path, to_file_path)

        # TRANSFER LABELS
        from_base_dir_lbl = Path(dir, SRC_LABELS_DIR)
        to_base_path_lbl = Path(KITTI_FORMAT_DIR, pair[1], ALL_DIR, LABELS_DIR)
        
        for id in valid_files:
            # Make the from file id
            from_file_id = str(id).zfill(10) + '.txt'
            # Add the current index to the file index
            offset_id_index = int(id) + current_index
            # Get to file_id
            to_file_id = str(offset_id_index).zfill(6) + '.txt'
            
            from_file_path = Path(from_base_dir_lbl, from_file_id)
            to_file_path = Path(to_base_path_lbl, to_file_id)

            # print(from_file_path)
            # print(to_file_path)
            # print()
            copy_select_data(from_file_path, to_file_path)

        # TRANSFER IMAGES
        from_base_dir_img = Path(dir, SRC_IMAGE_DIR)
        to_base_path_img = Path(KITTI_FORMAT_DIR, pair[1], ALL_DIR, IMAGE_DIR)
        
        for id in valid_files:
            # Make the from file id
            from_file_id = str(id).zfill(10) + '.png'
            # Add the current index to the file index
            offset_id_index = int(id) + current_index
            # Get to file_id
            to_file_id = str(offset_id_index).zfill(6) + '.png'
            
            from_file_path = Path(from_base_dir_img, from_file_id)
            to_file_path = Path(to_base_path_img, to_file_id)

            # print(from_file_path)
            # print(to_file_path)
            # print()
            copy_select_data(from_file_path, to_file_path)
    
    # Return the number of files processed for the offset count
    return len(valid_files)

In [72]:
def copy_files_from_stationary_to_kitti_folders():
    p = Path(STATIONARY_DATA_DIR)

    current_index = 0
    
    # For each sequence (folder) in the stationary data
    for dir in p.iterdir(): 
        if dir.is_dir():
            print('copying from: ' + str(dir))
            new_offset = _copy_files_from_stationary_to_kitti_folders(dir, current_index)
            if new_offset:
                current_index = current_index + new_offset

In [73]:
def save_list_to_file(list_of_files, file_name):
    with open(file_name, 'w') as f:
        for item in list_of_files:
            # Split the file name at the dot and take the first part
            # numeric_part = item.split('.')[0]
            f.write("%s\n" % item)

In [74]:
def make_test_train_split_txts():
    print('making train test splits')
    # Use the filtered data to get indices
    template_path = Path(KITTI_FORMAT_DIR, FILTERED_DIR, ALL_DIR, VELODYNE_DIR)
    file_list = [f for f in os.listdir(template_path) if f.endswith('.bin')]
    
    # Get the list of IDs from the file list
    file_ids = [os.path.splitext(filename)[0] for filename in file_list]
    
    # Shuffle the list to ensure randomness
    random.shuffle(file_ids)

    # Define split proportions
    train_split = 0.75  # 75% of the data
    val_split = 0.1  # 10% of the data
    test_split = 0.15  # 15% of the data

    # Calculate split sizes
    total_files = len(file_ids)
    train_size = int(total_files * train_split)
    val_size = int(total_files * val_split)

    # Split the dataset
    train_files = file_ids[:train_size]
    val_files = file_ids[train_size:train_size + val_size]
    test_files = file_ids[train_size + val_size:]

    # Combine train and val for trainval
    trainval_files = train_files + val_files

    # Save for each dataset
    for pair in PIPELINE_PAIRS:
        save_dir = Path(KITTI_FORMAT_DIR, pair[1], IMAGESETS_DIR)
    
        # Save to files
        save_list_to_file(train_files, os.path.join(save_dir, 'train.txt'))
        save_list_to_file(val_files, os.path.join(save_dir, 'val.txt'))
        save_list_to_file(test_files, os.path.join(save_dir, 'test.txt'))
        save_list_to_file(trainval_files, os.path.join(save_dir, 'trainval.txt'))

In [75]:
def copy_files_using_txt(dataset, test_or_train, file_list_path):
    # With the txt file
    # Copy files with a matching index from the all to the target dataset dir
    # Copy over velodyne files, labels, and images
    for subdir in DATASET_SUBDIRS:
        # Files to copy
        with open(file_list_path, 'r') as file:
            file_ids = {os.path.splitext(line.strip())[0] for line in file}

        # Make 'from' directory path
        from_dir = Path(KITTI_FORMAT_DIR, dataset, ALL_DIR, subdir)
        # Make 'to' directory path
        to_dir = Path(KITTI_FORMAT_DIR, dataset, test_or_train, subdir)

        # Get file extension
        file_extension = next(from_dir.iterdir()).suffix
        
        # For each file id in the .txt file
        for file_id in file_ids:
            # Verify that the corresponding file is in the from path
            from_file = Path(from_dir, file_id + file_extension)
            # Make 'from' and 'to' file paths
            if from_file.exists():
                # Copy the file over
                to_file = Path(to_dir, file_id + file_extension)
                copy_select_data(from_file, to_file)

    # Copy over calibration files
    # Make the calibration source path
    calib_source = Path(DATA_DIRECTORY, CALIB_DIR, '000000.txt')
    # For each file_id in the list
    with open(file_list_path, 'r') as file:
        file_ids = {os.path.splitext(line.strip())[0] for line in file}

        for file_id in file_ids:
            # Make a 'to' .txt file
            to_file = Path(KITTI_FORMAT_DIR, dataset, test_or_train, CALIB_DIR, file_id + '.txt')
            # Copy the file over
            copy_select_data(calib_source, to_file)


In [76]:
def copy_to_test_train():
    # For each dataset, copy from all to train and test
    for dataset in DATASET_DIRS:
        print('transferring train and test files for: ' + dataset)
        # Copy over the train files
        # train_dir = Path(KITTI_FORMAT_DIR, dataset, TRAINING_DIR)
        train_file_list_path = Path(KITTI_FORMAT_DIR, dataset, IMAGESETS_DIR, 'trainval.txt')
        copy_files_using_txt(dataset, TRAINING_DIR, train_file_list_path)
        
        # Copy over the test files
        # test_dir = Path(KITTI_FORMAT_DIR, dataset, TESTING_DIR)
        test_file_list_path = Path(KITTI_FORMAT_DIR, dataset, IMAGESETS_DIR, 'test.txt')
        copy_files_using_txt(dataset, TESTING_DIR, test_file_list_path)

In [77]:
def convert_to_kitti_format():   
    # Make KITTI file tree, one for unfiltered, filtered, and label filtered
    for dataset_dir in DATASET_DIRS:
        make_file_trees(dataset_dir)
    
    copy_files_from_stationary_to_kitti_folders()

    make_test_train_split_txts()

    copy_to_test_train()

    if delete_pre_split_folders:
        # This is not implemented yet
        delete_duplicate_data()

In [78]:
convert_to_kitti_format()

making file trees
making file trees
making file trees
copying from: ..\stationary_data\2011_09_26_drive_0017_sync_0_to_113
copying from: ..\stationary_data\2011_09_26_drive_0018_sync_0_to_178
copying from: ..\stationary_data\2011_09_26_drive_0051_sync_210_to_210
copying from: ..\stationary_data\2011_09_26_drive_0051_sync_224_to_360
copying from: ..\stationary_data\2011_09_26_drive_0060_sync_0_to_77
copying from: ..\stationary_data\2011_09_26_drive_0009_sync_404_to_404
copying from: ..\stationary_data\2011_09_26_drive_0009_sync_422_to_446
copying from: ..\stationary_data\2011_09_26_drive_0011_sync_167_to_232
copying from: ..\stationary_data\2011_09_26_drive_0059_sync_239_to_245
copying from: ..\stationary_data\2011_09_26_drive_0079_sync_93_to_99
copying from: ..\stationary_data\2011_09_26_drive_0084_sync_0_to_47
copying from: ..\stationary_data\2011_09_26_drive_0086_sync_696_to_696
copying from: ..\stationary_data\2011_09_26_drive_0086_sync_700_to_700
copying from: ..\stationary_data\20