In [None]:
import os
import random
import shutil

def copy_random_files(osm_ids, source_root, target_folder, file_name_pattern, file_extension):
    """
    Copies 5 random files matching a specific name pattern and extension from each OSM ID folder.

    Parameters:
    - osm_ids (list): List of OSM IDs to process.
    - source_root (str): Path to the root directory containing OSM ID folders.
    - target_folder (str): Path to the target directory for copied files.
    - file_name_pattern (str): Pattern in file names to look for.
    - file_extension (str): Extension of the files to copy.
    """
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

    for osm_id in osm_ids:
        osm_folder = os.path.join(source_root, str(osm_id))
        if not os.path.exists(osm_folder):
            print(f"Folder for OSM ID {osm_id} not found. Skipping...")
            continue

        # Find all files matching the pattern and extension
        matching_files = [
            f for f in os.listdir(osm_folder)
            if f.endswith(file_extension) and file_name_pattern in f
        ]

        if len(matching_files) < 5:
            print(f"Not enough matching files in folder {osm_folder}. Found {len(matching_files)}.")
            continue

        # Randomly select 5 files
        selected_files = random.sample(matching_files, 5)

        # Copy selected files to target folder
        for file in selected_files:
            src_file_path = os.path.join(osm_folder, file)
            dest_file_path = os.path.join(target_folder, file)
            shutil.copy(src_file_path, dest_file_path)
            print(f"Copied {src_file_path} to {dest_file_path}")

In [None]:


# Example usage
osm_ids = [398021, 271110, 2315704, 1933745, 8896976, 913110, 536780, 185567, 2697338, 125106, 79604]  # Replace with your OSM IDs
source_root = "../data/clean_data/solar/"  # Replace with the root directory path
target_folder = "../data/clean_data/solar/validation_sample/"  # Replace with the target folder path
file_name_pattern = "_rgb"  # Replace with the name pattern you're looking for
file_extension = ".tif"  # Replace with the desired file extension

copy_random_files(osm_ids, source_root, target_folder, file_name_pattern, file_extension)