In [8]:
import os
import random
import shutil

def copy_random_files(osm_ids, source_root, target_folder, file_name_pattern, file_extension):
    """
    Copies 5 random files matching a specific name pattern and extension from each OSM ID folder.

    Parameters:
    - osm_ids (list): List of OSM IDs to process.
    - source_root (str): Path to the root directory containing OSM ID folders.
    - target_folder (str): Path to the target directory for copied files.
    - file_name_pattern (str): Pattern in file names to look for.
    - file_extension (str): Extension of the files to copy.
    """

    # Clear the target folder
    if os.path.exists(target_folder):
        for file in os.listdir(target_folder):
            file_path = os.path.join(target_folder, file)
            if os.path.isfile(file_path):
                os.remove(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
        print(f"Cleared all files in target folder: {target_folder}")
    else:
        os.makedirs(target_folder)
        print(f"Created target folder: {target_folder}")
        
    for osm_id in osm_ids:
        osm_folder = os.path.join(source_root, str(osm_id))
        if not os.path.exists(osm_folder):
            print(f"Folder for OSM ID {osm_id} not found. Skipping...")
            continue

        # Find all files matching the pattern and extension
        matching_files = [
            f for f in os.listdir(osm_folder)
            if f.endswith(file_extension) and file_name_pattern in f
        ]

        if len(matching_files) < 5:
            print(f"Not enough matching files in folder {osm_folder}. Found {len(matching_files)}.")
            continue

        # Randomly select 5 files
        selected_files = random.sample(matching_files, 5)
        print(selected_files)

        # Copy selected files to target folder
        for file in selected_files:
            src_file_path = os.path.join(osm_folder, file)
            dest_file_path = os.path.join(target_folder, file)
            shutil.copy(src_file_path, dest_file_path)
            print(f"Copied {src_file_path} to {dest_file_path}")

In [16]:


# Example usage
osm_ids = [398021, 271110, 2315704, 1933745, 8896976, 913110, 536780, 185567, 2697338, 1251066, 79604]  # Replace with your OSM IDs
source_root = "../data/clean_data/solar/"  # Replace with the root directory path
target_folder = "../data/clean_data/solar/validation_sample/"  # Replace with the target folder path
file_name_pattern = "_rgb"  # Replace with the name pattern you're looking for
file_extension = ".tif"  # Replace with the desired file extension

copy_random_files(osm_ids, source_root, target_folder, file_name_pattern, file_extension)

Cleared all files in target folder: ../data/clean_data/solar/validation_sample/
['398021_p_935_2020_06_26_rgb.tif', '398021_p_200_2020_06_26_rgb.tif', '398021_p_283_2020_06_26_rgb.tif', '398021_p_209_2020_06_26_rgb.tif', '398021_p_785_2020_06_26_rgb.tif']
Copied ../data/clean_data/solar/398021/398021_p_935_2020_06_26_rgb.tif to ../data/clean_data/solar/validation_sample/398021_p_935_2020_06_26_rgb.tif
Copied ../data/clean_data/solar/398021/398021_p_200_2020_06_26_rgb.tif to ../data/clean_data/solar/validation_sample/398021_p_200_2020_06_26_rgb.tif
Copied ../data/clean_data/solar/398021/398021_p_283_2020_06_26_rgb.tif to ../data/clean_data/solar/validation_sample/398021_p_283_2020_06_26_rgb.tif
Copied ../data/clean_data/solar/398021/398021_p_209_2020_06_26_rgb.tif to ../data/clean_data/solar/validation_sample/398021_p_209_2020_06_26_rgb.tif
Copied ../data/clean_data/solar/398021/398021_p_785_2020_06_26_rgb.tif to ../data/clean_data/solar/validation_sample/398021_p_785_2020_06_26_rgb.tif