This file contains code to resize the images from 1440x1616 to 512x512, which is frequently used for U-net training

In [1]:
from PIL import Image
import numpy as np
from tqdm import tqdm
import os
import glob

Running resizing and saving the images in a loop

In [4]:
import os
import glob
from PIL import Image
import numpy as np

def resize_with_padding(img, target_size=(512, 512), color=(0, 0, 0)):
    """Resize an image to fit within target_size while maintaining aspect ratio and add padding."""
    # Get current dimensions
    width, height = img.size
    
    # Calculate scaling factor to maintain aspect ratio
    ratio = min(target_size[0] / width, target_size[1] / height)
    new_size = (int(width * ratio), int(height * ratio))
    
    # Resize image
    if img.mode == 'RGB':
        resized_img = img.resize(new_size, Image.Resampling.LANCZOS)
    else:  # For masks, use NEAREST to preserve binary values
        resized_img = img.resize(new_size, Image.Resampling.NEAREST)
    
    # Create new image with padding
    if img.mode == 'RGB':
        new_img = Image.new("RGB", target_size, color)
    else:  # For masks (grayscale/binary)
        new_img = Image.new("L", target_size, 0)
    
    # Paste resized image in center
    paste_position = ((target_size[0] - new_size[0]) // 2, 
                       (target_size[1] - new_size[1]) // 2)
    new_img.paste(resized_img, paste_position)
    
    return new_img

def process_dataset(base_dir="C:\\Users\\tania\\PythonProjects\\keylife-ng-pv\\ml_project_2\\data\\u-net-data\\", target_size=(512, 512)):
    # Define directories
    image_dir = os.path.join(base_dir, "training")
    mask_dir = os.path.join(base_dir, "training-labels")
    
    output_image_dir = os.path.join(base_dir, "512_tiff", "training")
    output_mask_dir = os.path.join(base_dir, "512_tiff", "training-labels")
    
    # Create output directories if they don't exist
    os.makedirs(output_image_dir, exist_ok=True)
    os.makedirs(output_mask_dir, exist_ok=True)
    
    # Get all image files
    image_files = glob.glob(os.path.join(image_dir, "*.jpg"))
    
    # Process each image and its corresponding mask
    for image_path in image_files:
        try:
            # Get the base filename without extension
            base_filename = os.path.basename(image_path).rsplit('.', 1)[0]
            
            # Construct mask path
            mask_path = os.path.join(mask_dir, f"{base_filename}_mask.png")
            
            # Skip if mask doesn't exist
            if not os.path.exists(mask_path):
                print(f"Warning: Mask not found for {base_filename}. Skipping.")
                continue
            
            # Load and process image
            image = Image.open(image_path)
            if image.mode != 'RGB':
                image = image.convert('RGB')
            resized_image = resize_with_padding(image, target_size)
            
            # Load and process mask
            mask = Image.open(mask_path)
            if mask.mode != 'L':
                mask = mask.convert('L')
            resized_mask = resize_with_padding(mask, target_size)
            
            # Save as TIFF
            output_image_path = os.path.join(output_image_dir, f"{base_filename}.tiff")
            output_mask_path = os.path.join(output_mask_dir, f"{base_filename}_mask.tiff")
            
            resized_image.save(output_image_path, format='TIFF', compression='tiff_lzw')
            resized_mask.save(output_mask_path, format='TIFF', compression='tiff_lzw')
            
            print(f"Processed {base_filename}")
            
        except Exception as e:
            print(f"Error processing {os.path.basename(image_path)}: {e}")
    
    print(f"Processing complete. Resized images and masks saved to {os.path.join(base_dir, '512_tiff')}")


In [5]:
process_dataset(target_size=(512, 512))

Processed 20250527212624
Processed 20250527212634
Processed 20250527212639
Processed 20250527212643
Processed 20250527212651
Processed 20250527212654
Processed 20250527212658
Processed 20250527212726
Processed 20250527212731
Processed 20250527212735
Processed 20250527212749
Processed 20250527212753
Processed 20250527212756
Processed 20250527212800
Processed 20250527212803
Processed 20250527212807
Processed 20250527212814
Processed 20250527212915
Processed 20250527212943
Processed 20250527212947
Processed 20250527212950
Processed 20250527212953
Processed 20250527212957
Processed 20250527213001
Processed 20250527213004
Processed 20250527213008
Processed 20250527213017
Processed 20250527213020
Processed 20250527213024
Processed 20250527213027
Processed 20250527213048
Processed 20250527213056
Processed 20250527213105
Processed 20250527213108
Processed 20250527213112
Processed 20250527213119
Processed 20250527213123
Processed 20250527213127
Processed 20250527213130
Processed 20250527213134
