# Dataset Relocation Code 

In [None]:
import os
import shutil
import re
from concurrent.futures import ThreadPoolExecutor

In [1]:
def should_copy_file(file_name):
    """Determine if a file should be copied based on its name."""
    if file_name == "meta.pkl":
        return True
    
    parts = file_name.split('_')
    if len(parts) > 1 and parts[-1].split('.')[0].isdigit():
        numeric_part = int(parts[-1].split('.')[0])
        if numeric_part <= 5:
            return True
    return False

def copy_file(source, destination):
    """Copy a single file from source to destination if it doesn't already exist."""
    if not os.path.exists(destination):  
        shutil.copy2(source, destination)

def copy_directory(source_directory, destination_directory):
    """Copy selected files from source directory to destination directory in parallel."""
    os.makedirs(destination_directory, exist_ok=True)
    with ThreadPoolExecutor() as executor:
        for root, dirs, files in os.walk(source_directory):
            for name in files:
                if should_copy_file(name):
                    source_file = os.path.join(root, name)
                    relative_path = os.path.relpath(source_file, source_directory)
                    destination_file = os.path.join(destination_directory, relative_path)
                    os.makedirs(os.path.dirname(destination_file), exist_ok=True)
                    executor.submit(copy_file, source_file, destination_file)
            
            for name in dirs:
                source_dir = os.path.join(root, name)
                relative_path = os.path.relpath(source_dir, source_directory)
                destination_dir = os.path.join(destination_directory, relative_path)
                os.makedirs(destination_dir, exist_ok=True)




In [None]:
source_directory = '/home/nfs/inf6/data/datasets/Carla_Moritz/SyncAngel3'
destination_directory = '/home/user/asifa0/Project/CARLABase'

copy_directory(source_directory, destination_directory)

# Code to Delete any extra files of frames greater than 6

In [4]:
def is_number_greater_than_005(file_name):
    """
    Check if the numeric part of the file name is greater than 005.
    This function assumes that the file name format includes an underscore
    followed by a number just before the file extension.
    """
    match = re.search(r"_(\d+)\.\w+$", file_name)
    if match:
        number = int(match.group(1))
        return number > 5
    return False

def delete_files_in_seq_directories(root_directory):
    """
    Iterate over the directories and subdirectories starting from root_directory,
    and delete files with a numeric part greater than 005 in their names
    within each directory that starts with "seq_".
    """
    for root, dirs, files in os.walk(root_directory):
        # Check if the current directory is a 'seq' directory
        if os.path.basename(root).startswith("seq_"):
            for file in files:
                if is_number_greater_than_005(file):
                    file_path = os.path.join(root, file)
                    os.remove(file_path)
                    print(f"Deleted: {file_path}")


In [None]:
# Define the root directory of your dataset
root_directory = '/home/user/asifa0/Project/CARLABase'

# Execute the deletion process
delete_files_in_seq_directories(root_directory)