In [None]:
import sys, os
os.chdir(os.path.dirname(sys.path[0]))
sys.path.append(os.path.join(os.path.dirname(sys.path[0]), 'src'))

In [None]:
import os
import shutil

def get_image_size(filepath):
    return os.path.getsize(filepath)

def filter_and_copy_images(source_dir, mask_dir, dest_dir, top_n=40):
    # Create destination directories
    images_dest_dir = os.path.join(dest_dir, 'images')
    masks_dest_dir = os.path.join(dest_dir, 'masks')
    os.makedirs(images_dest_dir, exist_ok=True)
    os.makedirs(masks_dest_dir, exist_ok=True)
    
    # Get list of all image files with sizes
    image_files = [os.path.join(source_dir, f) for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))]
    image_files_sorted = sorted(image_files, key=get_image_size, reverse=True)
    
    # Keep top_n images
    top_images = image_files_sorted[:top_n]
    
    for image_path in top_images:
        image_filename = os.path.basename(image_path)
        
        # Corresponding mask file
        mask_path = os.path.join(mask_dir, image_filename.replace(".jpg", ".png"))
        
        if os.path.exists(mask_path):
            # Copy image and mask to new directories
            shutil.copy(image_path, os.path.join(images_dest_dir, image_filename))
            shutil.copy(mask_path, os.path.join(masks_dest_dir, image_filename.replace(".jpg", ".png")))
        else:
            print(f"Warning: Mask for {image_filename} not found.")
    
    print(f"Copied {len(top_images)} images and their masks to {dest_dir}")

# Example usage:
source_directory = 'data2/external/training/images'
mask_directory = 'data2/external/training/groundtruth'
destination_directory = 'data2/external/curated_temp'

filter_and_copy_images(source_directory, mask_directory, destination_directory)

In [None]:
import os
import shutil
from PIL import Image

def get_image_size(filepath):
    return os.path.getsize(filepath)

def split_image(image_path, dest_dir, image_name, splits_x, splits_y, file_extension):
    # Open the image file
    with Image.open(image_path) as img:
        width, height = img.size
        sub_width, sub_height = width // splits_x, height // splits_y
        
        # Split and save the subimages
        for i in range(splits_x):
            for j in range(splits_y):
                left = i * sub_width
                upper = j * sub_height
                right = (i + 1) * sub_width
                lower = (j + 1) * sub_height
                sub_img = img.crop((left, upper, right, lower))
                sub_img.save(os.path.join(dest_dir, f"{image_name}_{i+1}_{j+1}.{file_extension}"))

def filter_and_copy_images(source_dir, mask_dir, dest_dir, splits_x, splits_y, top_n=200):
    # Create destination directories
    images_dest_dir = os.path.join(dest_dir, 'images')
    masks_dest_dir = os.path.join(dest_dir, 'masks')
    os.makedirs(images_dest_dir, exist_ok=True)
    os.makedirs(masks_dest_dir, exist_ok=True)
    
    # Get list of all image files with sizes
    image_files = [os.path.join(source_dir, f) for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f))]
    image_files_sorted = sorted(image_files, key=get_image_size, reverse=True)
    
    # Keep top_n images
    top_images = image_files_sorted[:top_n]
    
    for image_path in top_images:
        image_filename = os.path.basename(image_path)
        image_name, file_extension = os.path.splitext(image_filename)
        
        # Corresponding mask file
        mask_path = os.path.join(mask_dir, image_filename.replace(".jpg", ".png"))
        
        if os.path.exists(mask_path):
            # Split and save image and mask to new directories
            split_image(image_path, images_dest_dir, image_name, splits_x, splits_y, file_extension)
            split_image(mask_path, masks_dest_dir, image_name, splits_x, splits_y, ".png")
        else:
            print(f"Warning: Mask for {image_filename} not found.")
    
    print(f"Copied and split {len(top_images)} images and their masks to {dest_dir}")

# Example usage:
source_directory = 'data2/external/curated_temp/images'
mask_directory = 'data2/external/curated_temp/masks'
destination_directory = 'data2/external/curated'
splits_x = 2  # Number of splits along width
splits_y = 2  # Number of splits along height

filter_and_copy_images(source_directory, mask_directory, destination_directory, splits_x, splits_y)
# delete the temporary directory
shutil.rmtree('data2/external/curated_temp')