In [36]:
import os, shutil, tempfile, json, random
from PIL import Image

random.seed(0)

In [2]:
F_DATA = "./data"

F_TRAIN = F_DATA+"/train"
F_TRAIN_IMAGES = F_TRAIN+"/images"
F_TRAIN_IMAGES_POS = F_TRAIN_IMAGES+"/positive"
F_TRAIN_IMAGES_NEG = F_TRAIN_IMAGES+"/negative"
F_TRAIN_MASKS = F_TRAIN+"/masks"
F_TRAIN_MASKS_POS = F_TRAIN_MASKS+"/positive"
F_TRAIN_MASKS_NEG = F_TRAIN_MASKS+"/negative"
F_TRAIN_LABELS = F_TRAIN+"/labels"

F_VAL = F_DATA+"/val"
F_VAL_IMAGES = F_VAL+"/images"
F_VAL_IMAGES_POS = F_VAL_IMAGES+"/positive"
F_VAL_IMAGES_NEG = F_VAL_IMAGES+"/negative"
F_VAL_MASKS = F_VAL+"/masks"
F_VAL_MASKS_POS = F_VAL_MASKS+"/positive"
F_VAL_MASKS_NEG = F_VAL_MASKS+"/negative"
F_VAL_LABELS = F_VAL+"/labels"

F_TEST = F_DATA+"/test"
F_TEST_IMAGES = F_TEST+"/images"
F_TEST_IMAGES_POS = F_TEST_IMAGES+"/positive"
F_TEST_IMAGES_NEG = F_TEST_IMAGES+"/negative"
F_TEST_MASKS = F_TEST+"/masks"
F_TEST_MASKS_POS = F_TEST_MASKS+"/positive"
F_TEST_MASKS_NEG = F_TEST_MASKS+"/negative"

#### functions

In [3]:
def flatten_directory(main_folder_path):
    """
    Moves all files from inner folders directly into the main folder.
    Deletes any inner folders that become empty.
    
    Args:
        main_folder_path (str): Path to the main folder
    """
    # Make sure the path exists and is a directory
    if not os.path.isdir(main_folder_path):
        raise ValueError(f"The path {main_folder_path} is not a valid directory")
    
    # Get a list of all items in the main folder
    items = os.listdir(main_folder_path)
    
    # Process each item in the main folder
    for item in items:
        item_path = os.path.join(main_folder_path, item)
        
        # If the item is a directory, process its contents
        if os.path.isdir(item_path):
            # Get all files in the inner folder
            inner_items = os.listdir(item_path)
            
            # Move each file to the main folder
            for inner_item in inner_items:
                inner_item_path = os.path.join(item_path, inner_item)
                
                # Only move files, not directories
                if os.path.isfile(inner_item_path):
                    # Create destination path
                    destination_path = os.path.join(main_folder_path, inner_item)
                    
                    # Handle name conflicts
                    if os.path.exists(destination_path):
                        base, extension = os.path.splitext(inner_item)
                        counter = 1
                        while os.path.exists(destination_path):
                            new_name = f"{base}_{counter}{extension}"
                            destination_path = os.path.join(main_folder_path, new_name)
                            counter += 1
                    
                    # Move the file
                    shutil.move(inner_item_path, destination_path)
            
            # Check if the inner folder is now empty
            if len(os.listdir(item_path)) == 0:
                # Remove the empty directory
                os.rmdir(item_path)
            else:
                print(f"Note: Directory {item} still contains subdirectories and was not removed")
    
    print(f"Successfully flattened directory: {main_folder_path}")

In [4]:
def check_matching_files(folder_paths_list, folder2_path):
    """
    Checks multiple folders against folder2 and warns about files that don't have 
    a name match (ignoring extensions) in folder2.
    
    Args:
        folder_paths_list (list): List of paths to check against folder2
        folder2_path (str): Path to the reference folder
    
    Returns:
        dict: Dictionary mapping each input folder to a list of its files without matching names in folder2
    """
    # Validate folder2 path
    if not os.path.isdir(folder2_path):
        raise ValueError(f"The reference path {folder2_path} is not a valid directory")
    
    # Get list of files in folder2 (excluding directories)
    files2 = [f for f in os.listdir(folder2_path) if os.path.isfile(os.path.join(folder2_path, f))]
    
    # Extract base filenames without extensions from folder2
    basenames2 = set()
    for file in files2:
        basename = os.path.splitext(file)[0]
        basenames2.add(basename)
    
    # Process each folder in the input list
    results = {}
    total_missing = 0
    
    for folder_path in folder_paths_list:
        # Validate the current folder path
        if not os.path.isdir(folder_path):
            print(f"WARNING: The path {folder_path} is not a valid directory, skipping")
            continue
        
        # Get list of files in the current folder (excluding directories)
        files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
        
        # Check files against basenames in folder2
        missing_matches = []
        for file in files:
            basename = os.path.splitext(file)[0]
            if basename not in basenames2:
                missing_matches.append(file)
                print(f"WARNING: '{file}' in {folder_path} has no name match in {folder2_path}")
        
        # Store results for this folder
        results[folder_path] = missing_matches
        total_missing += len(missing_matches)
        
        # Print summary for this folder
        if not missing_matches:
            print(f"All files in {folder_path} have matching names in {folder2_path}")
        else:
            print(f"Found {len(missing_matches)} files without matching names in {folder_path}")
    
    # Print overall summary
    print(f"\nSummary: Found a total of {total_missing} files without matching names across {len(folder_paths_list)} folders")
    
    return results

In [5]:
def move_matching_files(folder1_path, folder2_path, target_folder_path):
    """
    For each file in folder1 that has a name match (ignoring extensions) in folder2,
    move that matching file from folder2 to the target folder.
    
    Args:
        folder1_path (str): Path to the first folder (reference folder)
        folder2_path (str): Path to the second folder (source of files to move)
        target_folder_path (str): Path to the target folder (destination for moved files)
    
    Returns:
        list: List of files that were moved
    """
    # Validate paths
    if not os.path.isdir(folder1_path):
        raise ValueError(f"The path {folder1_path} is not a valid directory")
    if not os.path.isdir(folder2_path):
        raise ValueError(f"The path {folder2_path} is not a valid directory")
    
    # Create target folder if it doesn't exist
    if not os.path.exists(target_folder_path):
        os.makedirs(target_folder_path)
        print(f"Created target directory: {target_folder_path}")
    elif not os.path.isdir(target_folder_path):
        raise ValueError(f"The target path {target_folder_path} is not a directory")
    
    # Get list of files in each folder (excluding directories)
    files1 = [f for f in os.listdir(folder1_path) if os.path.isfile(os.path.join(folder1_path, f))]
    files2 = [f for f in os.listdir(folder2_path) if os.path.isfile(os.path.join(folder2_path, f))]
    
    # Extract base filenames from folder1
    basenames1 = set()
    for file in files1:
        basename = os.path.splitext(file)[0]
        basenames1.add(basename)
    
    # Check files in folder2 against basenames in folder1 and move matching files
    moved_files = []
    for file in files2:
        basename = os.path.splitext(file)[0]
        if basename in basenames1:
            source_path = os.path.join(folder2_path, file)
            target_path = os.path.join(target_folder_path, file)
            
            # Handle name conflicts
            if os.path.exists(target_path):
                base, extension = os.path.splitext(file)
                counter = 1
                while os.path.exists(target_path):
                    new_name = f"{base}_{counter}{extension}"
                    target_path = os.path.join(target_folder_path, new_name)
                    counter += 1
            
            # Move the file
            shutil.move(source_path, target_path)
            moved_files.append(file)
    
    if not moved_files:
        print("No matching files found to move")
    else:
        print(f"Successfully moved {len(moved_files)} files to {target_folder_path}")

In [6]:
def move_unmatching_files(folder1, folder2, target_folder):
    """
    Moves files from 'folder2' to 'target_folder' if their names (ignoring extensions) 
    do not exist in 'folder1'.
    
    Parameters:
    folder1 (str): Path to the first folder (reference folder).
    folder2 (str): Path to the second folder (source folder).
    target_folder (str): Path to the target folder where unique files will be moved.
    
    Prints:
    Total number of files moved.
    """
    # Ensure target folder exists
    os.makedirs(target_folder, exist_ok=True)
    
    # Get filenames without extensions in folder1
    folder1_files = {os.path.splitext(f)[0] for f in os.listdir(folder1) if os.path.isfile(os.path.join(folder1, f))}
    
    moved_count = 0
    
    # Iterate through files in folder2
    for file in os.listdir(folder2):
        file_path = os.path.join(folder2, file)
        if os.path.isfile(file_path):
            file_name, _ = os.path.splitext(file)
            
            # Move file if its name (ignoring extension) is not in folder1
            if file_name not in folder1_files:
                shutil.move(file_path, os.path.join(target_folder, file))
                moved_count += 1
    
    print(f"Total files moved: {moved_count}")

In [7]:
def organize_files_to_folders(folder1_path, folder2_path):
    """
    For each file in folder2, find which inner folder in folder1 has a file with
    the same name (ignoring extension) and move the file to a matching inner folder 
    in folder2 (creating it if necessary).
    
    Args:
        folder1_path (str): Path to the reference folder with inner folders
        folder2_path (str): Path to the target folder with files to organize
    
    Returns:
        dict: Dictionary mapping each file to its destination folder
    """
    # Validate paths
    if not os.path.isdir(folder1_path):
        raise ValueError(f"The reference path {folder1_path} is not a valid directory")
    if not os.path.isdir(folder2_path):
        raise ValueError(f"The target path {folder2_path} is not a valid directory")
    
    # Get list of files directly in folder2 (excluding directories)
    folder2_files = [f for f in os.listdir(folder2_path) if os.path.isfile(os.path.join(folder2_path, f))]
    
    # If no files to process, exit early
    if not folder2_files:
        print(f"No files found directly in {folder2_path} to organize")
        return {}
    
    # Create a mapping of basenames to inner folders from folder1
    basename_to_folder = {}
    
    # Get all inner folders in folder1
    inner_folders = [f for f in os.listdir(folder1_path) if os.path.isdir(os.path.join(folder1_path, f))]
    
    # Process each inner folder in folder1
    for inner_folder in inner_folders:
        inner_folder_path = os.path.join(folder1_path, inner_folder)
        
        # Get all files in this inner folder
        inner_files = [f for f in os.listdir(inner_folder_path) if os.path.isfile(os.path.join(inner_folder_path, f))]
        
        # Map each file's basename to this inner folder
        for file in inner_files:
            basename = os.path.splitext(file)[0]
            basename_to_folder[basename] = inner_folder
    
    # Track the movements for the return value
    file_movements = {}
    
    # Process each file in folder2
    for file in folder2_files:
        basename = os.path.splitext(file)[0]
        
        # Check if this file belongs to an inner folder
        if basename in basename_to_folder:
            inner_folder = basename_to_folder[basename]
            
            # Create the inner folder in folder2 if it doesn't exist
            inner_folder_path_in_folder2 = os.path.join(folder2_path, inner_folder)
            if not os.path.exists(inner_folder_path_in_folder2):
                os.makedirs(inner_folder_path_in_folder2)
                print(f"Created folder: {inner_folder_path_in_folder2}")
            
            # Move the file to the appropriate inner folder
            source_path = os.path.join(folder2_path, file)
            destination_path = os.path.join(inner_folder_path_in_folder2, file)
            
            # Handle name conflicts
            if os.path.exists(destination_path):
                base, extension = os.path.splitext(file)
                counter = 1
                while os.path.exists(destination_path):
                    new_name = f"{base}_{counter}{extension}"
                    destination_path = os.path.join(inner_folder_path_in_folder2, new_name)
                    counter += 1
            
            # Move the file
            shutil.move(source_path, destination_path)
            file_movements[file] = inner_folder
            print(f"Moved: '{file}' to folder '{inner_folder}'")
        else:
            print(f"No matching folder found for: '{file}'")
    
    # Print summary
    print(f"\nSummary: Organized {len(file_movements)} out of {len(folder2_files)} files into inner folders")
    
    return file_movements

In [None]:
def create_empty_masks(folder1_path, folder2_path, target_extension):
    """
    Efficiently creates black images matching the dimensions of images in folder1 and saves them to folder2.
    Clears all contents of folder2 before starting.
    Optimizes by creating one template black image per unique resolution.
    
    Args:
        folder1_path (str): Path to the source folder with original images
        folder2_path (str): Path to the destination folder for black images
        target_extension (str): The extension to use for the new images (e.g., 'jpg', 'png')
    
    Returns:
        dict: Dictionary with stats about the process
    """
    # Make sure the target extension starts with a dot
    if not target_extension.startswith('.'):
        target_extension = '.' + target_extension
    
    # Validate paths
    if not os.path.isdir(folder1_path):
        raise ValueError(f"The source path {folder1_path} is not a valid directory")
    
    # Clear and recreate destination folder
    if os.path.exists(folder2_path):
        if os.path.isdir(folder2_path):
            print(f"Clearing existing directory: {folder2_path}")
            # Remove all contents of the directory
            for item in os.listdir(folder2_path):
                item_path = os.path.join(folder2_path, item)
                if os.path.isfile(item_path):
                    os.unlink(item_path)
                elif os.path.isdir(item_path):
                    shutil.rmtree(item_path)
        else:
            # It exists but is not a directory, remove it and create a directory
            os.remove(folder2_path)
            os.makedirs(folder2_path)
            print(f"Removed file and created directory: {folder2_path}")
    else:
        # Create new directory
        os.makedirs(folder2_path)
        print(f"Created new directory: {folder2_path}")
    
    # Common image extensions
    image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.webp'}
    
    # Step 1: Analyze the images and find unique resolutions
    unique_resolutions = {}  # Maps (width, height) to list of files with that resolution
    skipped_files = []
    
    print("Analyzing image resolutions...")
    for file in os.listdir(folder1_path):
        file_path = os.path.join(folder1_path, file)
        
        # Skip directories and non-image files
        if os.path.isdir(file_path):
            continue
            
        # Check if file is an image by extension
        _, extension = os.path.splitext(file)
        if extension.lower() not in image_extensions:
            continue
            
        try:
            # Open the image and get its resolution
            with Image.open(file_path) as img:
                resolution = img.size  # (width, height)
                
                # Add to our resolution mapping
                if resolution not in unique_resolutions:
                    unique_resolutions[resolution] = []
                unique_resolutions[resolution].append(file)
                
        except Exception as e:
            print(f"Error analyzing {file}: {str(e)}")
            skipped_files.append(file)
    
    print(f"Found {len(unique_resolutions)} unique resolutions across {sum(len(files) for files in unique_resolutions.values())} images")
    
    # Step 2: Create one black template image for each unique resolution
    temp_black_images = {}  # Maps resolution to temp file path
    with tempfile.TemporaryDirectory() as temp_dir:
        print("Creating template black images...")
        for resolution in unique_resolutions:
            width, height = resolution
            # Create black image
            black_img = Image.new('RGB', resolution, color=(0, 0, 0))
            
            # Save to temporary file
            temp_path = os.path.join(temp_dir, f"black_{width}x{height}{target_extension}")
            black_img.save(temp_path)
            temp_black_images[resolution] = temp_path
            print(f"Created template black image: {width}x{height}")
        
        # Step 3: Use the templates to create the final images
        print("Generating output images...")
        created_count = 0
        
        for resolution, files in unique_resolutions.items():
            temp_image_path = temp_black_images[resolution]
            width, height = resolution
            
            for file in files:
                # Get base name without extension
                base_name = os.path.splitext(file)[0]
                
                # Create new filename with target extension
                new_filename = base_name + target_extension
                new_file_path = os.path.join(folder2_path, new_filename)
                
                # Copy the temporary black image
                with open(temp_image_path, 'rb') as src_file:
                    with open(new_file_path, 'wb') as dst_file:
                        dst_file.write(src_file.read())
                
                created_count += 1
                if created_count % 50 == 0:
                    print(f"Progress: {created_count} images created")
    
    # Final summary
    stats = {
        "total_processed": sum(len(files) for files in unique_resolutions.values()),
        "unique_resolutions": len(unique_resolutions),
        "created_images": created_count,
        "skipped_files": skipped_files
    }
    
    print(f"\nSummary: Created {created_count} black images in {folder2_path}")
    print(f"Unique resolutions: {len(unique_resolutions)}")
    
    return stats

In [9]:
def concatenate_text_files(folder_path, output_file):
    """Concatenates all text files in the given folder without extra empty lines."""
    with open(output_file, 'w', encoding='utf-8') as outfile:
        first_file = True  # To track the first file and avoid leading newline
        for filename in sorted(os.listdir(folder_path)):  
            file_path = os.path.join(folder_path, filename)
            if os.path.isfile(file_path) and filename.endswith('.txt'):
                with open(file_path, 'r', encoding='utf-8') as infile:
                    content = infile.read().strip()  # Remove leading/trailing newlines
                    if content:
                        if not first_file:
                            outfile.write('\n')  # Add newline only between files
                        outfile.write(content)
                        first_file = False

In [10]:
def save_boxes_to_json(input_txt, output_json):
    """Parses a text file and saves image names with box coordinates to a JSON file."""
    data = {}

    with open(input_txt, 'r', encoding='utf-8') as infile:
        for line in infile:
            parts = line.strip().split()
            if len(parts) != 2:
                continue  # Skip invalid lines

            filename, box_str = parts
            box_values = box_str.split(',')[:4]  # Extract only the first 4 values

            try:
                box = list(map(int, box_values))  # Convert to integers
                data[filename] = box  # Store in dictionary
            except ValueError:
                continue  # Skip invalid number formats

    with open(output_json, 'w', encoding='utf-8') as outfile:
        json.dump(data, outfile, indent=4)

In [11]:
def move_first_n_images(input_folder: str, output_folder: str, n: int):
    """
    Moves the first N images from input_folder to output_folder and returns their names.
    
    :param input_folder: Path to the source folder containing images.
    :param output_folder: Path to the destination folder.
    :param n: Number of images to move.
    :return: List of moved image filenames.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    images = [f for f in os.listdir(input_folder) if f.lower().endswith(('png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'webp'))]
    images.sort()
    
    moved_images = []
    for image in random.sample(images, n):
        src_path = os.path.join(input_folder, image)
        dest_path = os.path.join(output_folder, image)
        shutil.move(src_path, dest_path)
        moved_images.append(image)
    
    return moved_images

In [12]:
def move_random_n_images(input_folder: str, output_folder: str, n: int):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    images = [f for f in os.listdir(input_folder) if f.lower().endswith(('png', 'jpg', 'jpeg', 'gif', 'bmp', 'tiff', 'webp'))]
    
    moved_images = []
    for image in images[:n]:
        src_path = os.path.join(input_folder, image)
        dest_path = os.path.join(output_folder, image)
        shutil.move(src_path, dest_path)
        moved_images.append(image)
    
    return moved_images

In [13]:
def move_images_by_name(input_folder: str, output_folder: str, image_names: list):
    """
    Moves images from input_folder to output_folder based on given image names (ignoring extensions).
    
    :param input_folder: Path to the source folder containing images.
    :param output_folder: Path to the destination folder.
    :param image_names: List of image names (with extensions) to match.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    base_names = {os.path.splitext(name)[0] for name in image_names}
    
    moved_images = []
    for file in os.listdir(input_folder):
        file_base, file_ext = os.path.splitext(file)
        if file_base in base_names:
            src_path = os.path.join(input_folder, file)
            dest_path = os.path.join(output_folder, file)
            shutil.move(src_path, dest_path)
            moved_images.append(file)
    
    return moved_images

In [14]:
def remove_uncommon_files(folder1, folder2):
    def get_filenames_without_ext(folder):
        return {os.path.splitext(f)[0] for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))}

    common_files = get_filenames_without_ext(folder1) & get_filenames_without_ext(folder2)

    for folder in [folder1, folder2]:
        for file in os.listdir(folder):
            file_path = os.path.join(folder, file)
            name, ext = os.path.splitext(file)
            if os.path.isfile(file_path) and name not in common_files:
                os.remove(file_path)
                print(f"Removed: {file_path}")

In [15]:
def move_even_indexed_files(source_folder, target_folder):
    # Ensure target folder exists
    os.makedirs(target_folder, exist_ok=True)
    
    # Get sorted list of files (ignoring directories)
    files = sorted([f for f in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, f))])

    # Select files at even indices (starting from 1)
    even_indexed_files = [files[i] for i in range(1, len(files), 2)]

    # Move selected files to the target folder
    for file in even_indexed_files:
        src_path = os.path.join(source_folder, file)
        dest_path = os.path.join(target_folder, file)
        shutil.move(src_path, dest_path)

In [27]:
def rename_files_with_leading_zeros(folder_path, n):
    files = sorted(os.listdir(folder_path))  # Sort to maintain order
    
    for index, filename in enumerate(files, start=1):
        file_ext = os.path.splitext(filename)[1]  # Get file extension
        new_name = str(index).zfill(n) + file_ext  # Add leading zeros
        old_path = os.path.join(folder_path, filename)
        new_path = os.path.join(folder_path, new_name)
        
        if old_path != new_path:
            os.rename(old_path, new_path)

In [None]:
def add_prefix_to_files(folder_path, prefix):
    files = os.listdir(folder_path)
    
    for filename in files:
        old_path = os.path.join(folder_path, filename)
        new_name = prefix + filename
        new_path = os.path.join(folder_path, new_name)
        
        if old_path != new_path:
            os.rename(old_path, new_path)

In [34]:
def remove_colors_from_masks(folder_path):
    files = os.listdir(folder_path)
    
    for filename in files:
        file_ext = os.path.splitext(filename)[1].lower()
        if file_ext in ['.jpg', '.jpeg', '.png', '.bmp', '.gif']:
            image_path = os.path.join(folder_path, filename)
            image = Image.open(image_path).convert('L')  # Convert to grayscale
            
            # Convert every non-black pixel to white
            threshold = 1  # Anything above 0 (pure black) turns white
            image = image.point(lambda p: 255 if p > threshold else 0)
            
            image.save(image_path)  # Overwrite the original image

#### apply

In [20]:
flatten_directory(F_TRAIN_IMAGES_POS)
flatten_directory(F_TRAIN_MASKS_POS)

flatten_directory(F_TRAIN_IMAGES_NEG)

Successfully flattened directory: ./data/train/images/positive
Successfully flattened directory: ./data/train/masks/positive
Successfully flattened directory: ./data/train/images/negative


In [19]:
remove_uncommon_files(F_TRAIN_MASKS_POS, F_TRAIN_IMAGES_POS)

In [None]:
rename_files_with_leading_zeros(F_DATA+"/CVC-ColonDB/images", 3)
rename_files_with_leading_zeros(F_DATA+"/CVC-ColonDB/masks", 3)

In [31]:
add_prefix_to_files(F_DATA+"/CVC-ColonDB/images", "cvc_colodb_")
add_prefix_to_files(F_DATA+"/CVC-ColonDB/masks", "cvc_colodb_")

In [28]:
add_prefix_to_files(F_DATA+"/BKAI-IGH/images", "bkai_igh_")
add_prefix_to_files(F_DATA+"/BKAI-IGH/masks", "bkai_igh_")

In [35]:
remove_colors_from_masks(F_DATA+"/BKAI-IGH/masks")

In [None]:
check_matching_files([DATA_FOLDER+"/masks/TrainDataset"], DATA_FOLDER+"/positive")

Found 2 files without matching names


['.DS_Store', '.DS_Store_1']

In [9]:
move_matching_files(DATA_FOLDER+"/masks/TrainDataset", DATA_FOLDER+"/positive", DATA_FOLDER+"/train")

Moved: 'case_M_20181003094031_0U62363100354631_1_001_002-1_a13_ayy_image0045.jpg' from ./data/sun-seg/positive to ./data/sun-seg/train
Moved: 'case_M_20181017100226_0U62367101735926_1_005_001-1_a1_ayy_image0014.jpg' from ./data/sun-seg/positive to ./data/sun-seg/train
Moved: 'case_M_20190111094523_0U62372011160922_1_005_001-1_a2_ayy_image0009.jpg' from ./data/sun-seg/positive to ./data/sun-seg/train
Moved: 'case_M_20181212144657_0U62366121262556_1_001_002-1_a2_ayy_image0062.jpg' from ./data/sun-seg/positive to ./data/sun-seg/train
Moved: 'case_M_20181015101337_0U62363101576536_1_005_001-1_a20_ayy_image0001.jpg' from ./data/sun-seg/positive to ./data/sun-seg/train
Moved: 'case_M_20190117105747_0U62368011712547_1_004_001-1_a13_ayy_image0167.jpg' from ./data/sun-seg/positive to ./data/sun-seg/train
Moved: 'case_M_20181004130627_0U62365100475026_1_002_001-1_a5_ayy_image0123.jpg' from ./data/sun-seg/positive to ./data/sun-seg/train
Moved: 'case_M_20181211093229_0U62367121175028_1_004_001-1_

['case_M_20181003094031_0U62363100354631_1_001_002-1_a13_ayy_image0045.jpg',
 'case_M_20181017100226_0U62367101735926_1_005_001-1_a1_ayy_image0014.jpg',
 'case_M_20190111094523_0U62372011160922_1_005_001-1_a2_ayy_image0009.jpg',
 'case_M_20181212144657_0U62366121262556_1_001_002-1_a2_ayy_image0062.jpg',
 'case_M_20181015101337_0U62363101576536_1_005_001-1_a20_ayy_image0001.jpg',
 'case_M_20190117105747_0U62368011712547_1_004_001-1_a13_ayy_image0167.jpg',
 'case_M_20181004130627_0U62365100475026_1_002_001-1_a5_ayy_image0123.jpg',
 'case_M_20181211093229_0U62367121175028_1_004_001-1_a4_ayy_image0024.jpg',
 'case_M_20190110094509_0U62363011095308_1_001_001-1_a5_ayy_image0050.jpg',
 'case_M_20190117095926_0U62372011750026_1_002_001-1_a1_ayy_image0006.jpg',
 'case_M_20181213131150_0U62365121395349_1_002_002-1_a25_ayy_image0048.jpg',
 'case_M_20181203093846_0U62367120345345_1_005_001-1_a10_ayy_image0003.jpg',
 'case_M_20181109094641_0U62372110931241_1_005_001-1_a4_ayy_image0090.jpg',
 'case_

In [21]:
test_folders = [DATA_FOLDER+"/masks/"+folder for folder in os.listdir(DATA_FOLDER+"/masks")]
for folder in test_folders:
    flatten_directory(folder)

Successfully flattened directory: ./data/sun-seg/masks/EasyUnseen
Successfully flattened directory: ./data/sun-seg/masks/HardUnseen
Successfully flattened directory: ./data/sun-seg/masks/EasySeen
Successfully flattened directory: ./data/sun-seg/masks/HardSeen


In [22]:
check_matching_files(test_folders, DATA_FOLDER+"/testImages")

Found 1 files without matching names in ./data/sun-seg/masks/EasyUnseen
Found 1 files without matching names in ./data/sun-seg/masks/HardUnseen
Found 1 files without matching names in ./data/sun-seg/masks/EasySeen
Found 2 files without matching names in ./data/sun-seg/masks/HardSeen

Summary: Found a total of 5 files without matching names across 4 folders


{'./data/sun-seg/masks/EasyUnseen': ['.DS_Store'],
 './data/sun-seg/masks/HardUnseen': ['.DS_Store'],
 './data/sun-seg/masks/EasySeen': ['.DS_Store'],
 './data/sun-seg/masks/HardSeen': ['.DS_Store', '.DS_Store_1']}

In [24]:
organize_files_to_folders(DATA_FOLDER+"/masks", DATA_FOLDER+"/testImages")

Created folder: ./data/sun-seg/testImages/HardUnseen
Moved: 'case_M_20181031103335_0U62368103151535_1_006_001-1_a2_ayy_image0003.jpg' to folder 'HardUnseen'
Created folder: ./data/sun-seg/testImages/EasySeen
Moved: 'case_M_20181213131150_0U62365121395349_1_002_002-1_a26_ayy_image0122.jpg' to folder 'EasySeen'
Moved: 'case_M_20181213131150_0U62365121395349_1_002_002-1_a24_ayy_image0233.jpg' to folder 'EasySeen'
Moved: 'case_M_20190107094528_0U62367010726528_1_002_002-1_a5_ayy_image0058.jpg' to folder 'HardUnseen'
Created folder: ./data/sun-seg/testImages/EasyUnseen
Moved: 'case_M_20181119111911_0U62363111914011_1_005_001-1_a16_ayy_image0263.jpg' to folder 'EasyUnseen'
Created folder: ./data/sun-seg/testImages/HardSeen
Moved: 'case_M_20190111094523_0U62372011160922_1_004_002-1_a15_ayy_image0117.jpg' to folder 'HardSeen'
Moved: 'case_M_20181211093229_0U62367121175028_1_002_002-1_a3_ayy_image0072.jpg' to folder 'EasyUnseen'
Moved: 'case_M_20181218101803_0U62372121864002_1_001_001-1_a3_ayy_

{'case_M_20181031103335_0U62368103151535_1_006_001-1_a2_ayy_image0003.jpg': 'HardUnseen',
 'case_M_20181213131150_0U62365121395349_1_002_002-1_a26_ayy_image0122.jpg': 'EasySeen',
 'case_M_20181213131150_0U62365121395349_1_002_002-1_a24_ayy_image0233.jpg': 'EasySeen',
 'case_M_20190107094528_0U62367010726528_1_002_002-1_a5_ayy_image0058.jpg': 'HardUnseen',
 'case_M_20181119111911_0U62363111914011_1_005_001-1_a16_ayy_image0263.jpg': 'EasyUnseen',
 'case_M_20190111094523_0U62372011160922_1_004_002-1_a15_ayy_image0117.jpg': 'HardSeen',
 'case_M_20181211093229_0U62367121175028_1_002_002-1_a3_ayy_image0072.jpg': 'EasyUnseen',
 'case_M_20181218101803_0U62372121864002_1_001_001-1_a3_ayy_image0009.jpg': 'HardUnseen',
 'case_M_20181029094800_0U62368102981259_1_005_001-1_a17_ayy_image0013.jpg': 'HardUnseen',
 'case_M_20181213131150_0U62365121395349_1_002_002-1_a26_ayy_image0225.jpg': 'EasySeen',
 'case_M_20181119111911_0U62363111914011_1_005_001-1_a12_ayy_image0208.jpg': 'EasyUnseen',
 'case_M_20

In [None]:
create_empty_masks(DATA_FOLDER+"/train/images/negative", DATA_FOLDER+"/train/masks/negative", ".png")

Clearing existing directory: ./data/sun-seg/train/masks/negative
Analyzing image resolutions...
Found 1 unique resolutions across 20034 images
Creating template black images...
Created template black image: 1240x1080
Generating output images...
Progress: 50 images created
Progress: 100 images created
Progress: 150 images created
Progress: 200 images created
Progress: 250 images created
Progress: 300 images created
Progress: 350 images created
Progress: 400 images created
Progress: 450 images created
Progress: 500 images created
Progress: 550 images created
Progress: 600 images created
Progress: 650 images created
Progress: 700 images created
Progress: 750 images created
Progress: 800 images created
Progress: 850 images created
Progress: 900 images created
Progress: 950 images created
Progress: 1000 images created
Progress: 1050 images created
Progress: 1100 images created
Progress: 1150 images created
Progress: 1200 images created
Progress: 1250 images created
Progress: 1300 images cre

{'total_processed': 20034,
 'unique_resolutions': 1,
 'created_images': 20034,
 'skipped_files': []}

In [7]:
concatenate_text_files("./data/sun-seg-local/zip/annotations", "./data/sun-seg-local/zip/annotations.txt")

In [11]:
save_boxes_to_json("./data/sun-seg-local/zip/annotations.txt", "./data/sun-seg-local/zip/annotations.json")

In [10]:
moved = move_first_n_images(DATA_FOLDER+"/test/images/positive/unseen/hard", DATA_FOLDER+"/train/images/positive", 5640)
moved

['case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0001.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0002.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0003.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0004.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0005.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0006.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0007.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0008.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0009.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0010.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0011.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0012.jpg',
 'case_M_20181017100226_0U62367101735926_1_007_001-1_a12_ayy_image0013.jpg',

In [11]:
move_images_by_name(DATA_FOLDER+"/test/masks/positive/unseen/hard", DATA_FOLDER+"/train/masks/positive", moved)

['case_M_20181018093806_0U62363101890605_1_006_001-1_a3_ayy_image0030.png',
 'case_M_20181120115731_0U62368112064030_1_001_001-1_a6_ayy_image0063.png',
 'case_M_20181019101517_0U62367101935917_1_003_001-1_a15_ayy_image0047.png',
 'case_M_20181117100025_0U62363111779624_1_007_003-1_a8_ayy_image0354.png',
 'case_M_20181019101517_0U62367101935917_1_003_001-1_a11_ayy_image0123.png',
 'case_M_20181117100025_0U62363111779624_1_007_003-1_a8_ayy_image0461.png',
 'case_M_20181018093806_0U62363101890605_1_007_002-1_a7_ayy_image0010.png',
 'case_M_20181029094800_0U62368102981259_1_005_001-1_a17_ayy_image0014.png',
 'case_M_20181031103335_0U62368103151535_1_006_001-1_a3_ayy_image0004.png',
 'case_M_20181029094800_0U62368102981259_1_005_001-1_a15_ayy_image0013.png',
 'case_M_20181119111911_0U62363111914011_1_005_001-1_a8_ayy_image0034.png',
 'case_M_20181214095357_0U62368121473456_1_006_003-1_a18_ayy_image0075.png',
 'case_M_20181025095048_0U62372102595347_1_004_001-1_a8_ayy_image0256.png',
 'case_

In [21]:
moved = move_random_n_images(DATA_FOLDER+"/train/images/positive", DATA_FOLDER+"/test/images/positive", 1500)
move_images_by_name(DATA_FOLDER+"/train/masks/positive", DATA_FOLDER+"/test/masks/positive", moved)

['case_M_20190131094104_0U62367013193703_1_003_001-1_a9_ayy_image0213.png',
 'case_M_20190115123306_0U62368011540606_1_002_001-1_a3_ayy_image0081.png',
 'case_M_20181119111911_0U62363111914011_1_005_001-1_a8_ayy_image0034.png',
 'case_M_20181025095048_0U62372102595347_1_004_001-1_a17_ayy_image0028.png',
 'case_M_20181015101337_0U62363101576536_1_005_002-1_a9_ayy_image0012.png',
 'case_M_20181022094701_0U62368102271800_1_005_001-1_a1_ayy_image0069.png',
 'case_M_20181105095958_0U62368110576557_1_003_002-1_a7_ayy_image0067.png',
 'case_M_20181018093806_0U62363101890605_1_007_001-1_a1_ayy_image0123.png',
 'case_M_20181029094800_0U62368102981259_1_005_001-1_a18_ayy_image0084.png',
 'case_M_20181025095048_0U62372102595347_1_004_001-1_a15_ayy_image0240.png',
 'case_M_20181106093315_0U62372110682814_1_007_001-1_a3_ayy_image0027.png',
 'case_M_20181226125224_0U62363122631224_1_003_001-1_a8_ayy_image0123.png',
 'case_M_20181018093806_0U62363101890605_1_007_002-1_a3_ayy_image0043.png',
 'case_M_

In [22]:
moved = move_random_n_images(DATA_FOLDER+"/train/images/negative", DATA_FOLDER+"/test/images/negative", 500)
move_images_by_name(DATA_FOLDER+"/train/masks/negative", DATA_FOLDER+"/test/masks/negative", moved)

['case_M_20181109094641_0U62372110931241_1_003_001-1_Negative_ayy_image009387.png',
 'case_M_20181109094641_0U62372110931241_1_003_001-1_Negative_ayy_image005089.png',
 'case_M_20181024134247_0U62368102492146_1_002_001-1_Negative_ayy_image008050.png',
 'case_M_20181109094641_0U62372110931241_1_003_001-1_Negative_ayy_image009131.png',
 'case_M_20181024134247_0U62368102492146_1_002_001-1_Negative_ayy_image006170.png',
 'case_M_20181024134247_0U62368102492146_1_002_001-1_Negative_ayy_image004989.png',
 'case_M_20181024134247_0U62368102492146_1_002_001-1_Negative_ayy_image010557.png',
 'case_M_20181024134247_0U62368102492146_1_002_001-1_Negative_2_ayy_image000032.png',
 'case_M_20181109094641_0U62372110931241_1_003_001-1_Negative_ayy_image008473.png',
 'case_M_20181024134247_0U62368102492146_1_002_001-1_Negative_ayy_image001853.png',
 'case_M_20181109094641_0U62372110931241_1_003_001-1_Negative_ayy_image003720.png',
 'case_M_20181109094641_0U62372110931241_1_003_001-1_Negative_ayy_image003

In [None]:
moved = move_first_n_images(DATA_FOLDER+"/test/images/positive", DATA_FOLDER+"/train/images/positive", 1500)
move_images_by_name(DATA_FOLDER+"/test/masks/positive", DATA_FOLDER+"/train/masks/positive", moved)

ValueError: Sample larger than population or is negative

In [15]:
move_unmatching_files(DATA_FOLDER+"/train/images/negative", DATA_FOLDER+"/test/images/negative/seen", DATA_FOLDER+"/test/images/negative/unseen")
move_unmatching_files(DATA_FOLDER+"/train/images/positive", DATA_FOLDER+"/test/images/positive/seen/easy", DATA_FOLDER+"/test/images/positive/unseen/easy")
move_unmatching_files(DATA_FOLDER+"/train/images/positive", DATA_FOLDER+"/test/images/positive/seen/hard", DATA_FOLDER+"/test/images/positive/unseen/hard")

Total files moved: 0
Total files moved: 0
Total files moved: 0


In [5]:
remove_uncommon_files(F_TRAIN_IMAGES_NEG, F_TRAIN_MASKS_NEG)

In [8]:
move_even_indexed_files(F_VAL_IMAGES_POS, F_VAL_MASKS_POS)

In [9]:
move_even_indexed_files(F_VAL_IMAGES_NEG, F_VAL_MASKS_NEG)