In [3]:
import os
import shutil
import csv

def move_files_based_on_csv(csv_file, images_dir, labels_dir):
    """Move files in images and labels folders based on the CSV file to their correct locations."""
    
    # Read the CSV file
    with open(csv_file, mode='r') as file:
        reader = csv.DictReader(file)
        file_list = list(reader)

    # Function to find and move a file if it's not in the correct split
    def move_file(file_name, current_split, base_dir, file_ext):
        """Move file from wrong split to correct split."""
        current_path = os.path.join(base_dir, current_split, f'{file_name}{file_ext}')
        
        # If the file is not in the current split, search other splits
        if not os.path.exists(current_path):
            print(f"{file_name}{file_ext} not found in {current_split}, searching other splits...")
            for split in ['train', 'val', 'test']:
                if split != current_split:  # Don't check the current split
                    other_path = os.path.join(base_dir, split, f'{file_name}{file_ext}')
                    if os.path.exists(other_path):
                        print(f"Found {file_name}{file_ext} in {split}, moving to {current_split}...")
                        # Move the file to the correct split
                        shutil.move(other_path, current_path)
                        break

    # Check and move image and label files
    for row in file_list:
        split = row['Split']
        file_name = row['File']
        
        # Move image files (.png)
        move_file(file_name, split, images_dir, '.png')
        
        # Move label files (.txt)
        move_file(file_name, split, labels_dir, '.txt')

if __name__ == '__main__':
    # Paths to the directories and CSV file
    images_dir = '02_baseline_code_and_model//Compete_COCO//images'  # Replace with actual path
    labels_dir = '02_baseline_code_and_model//Compete_COCO//labels'  # Replace with actual path
    csv_file = 'file_list.csv'                                       # Replace with actual CSV path

    # Move files based on the CSV
    move_files_based_on_csv(csv_file, images_dir, labels_dir)


EarlyNight_001740.png not found in train, searching other splits...
Found EarlyNight_001740.png in val, moving to train...
EarlyNight_001740.txt not found in train, searching other splits...
Found EarlyNight_001740.txt in val, moving to train...
ETRI_000680.png not found in val, searching other splits...
Found ETRI_000680.png in train, moving to val...
ETRI_000680.txt not found in val, searching other splits...
Found ETRI_000680.txt in train, moving to val...
