In [36]:
from skimage.filters import threshold_otsu
from scipy.ndimage import binary_fill_holes
from skimage import io, color, measure, morphology
import numpy as np
import os
from typing import List

In [37]:
def get_padding(image, size) -> List[int]:
    imsize = image.shape
    h_padding = (size[0] - imsize[0]) / 2
    v_padding = (size[1] - imsize[1]) / 2
    l_pad = h_padding if h_padding % 1 == 0 else h_padding + 0.5
    t_pad = v_padding if v_padding % 1 == 0 else v_padding + 0.5
    r_pad = h_padding if h_padding % 1 == 0 else h_padding - 0.5
    b_pad = v_padding if v_padding % 1 == 0 else v_padding - 0.5

    padding = ((int(l_pad), int(r_pad)), (int(t_pad), int(b_pad)))

    return padding

In [38]:
def process_and_crop_image(image_path, output_folder, margin=10, max_black=10, pad_image_to_size=(256, 256)):
    try:
        image = io.imread(image_path)

        if len(image.shape) > 2:
            image = color.rgb2gray(image)

        thresh = threshold_otsu(image)
        binary = image > thresh
        cleaned = morphology.remove_small_objects(binary, min_size=150)

        filled_image = binary_fill_holes(cleaned)

        label_img = measure.label(filled_image)
        regions = measure.regionprops(label_img)

        if not regions:
            return None

        region_max = max(regions, key=lambda r: r.area)

        minr, minc, maxr, maxc = region_max.bbox
        width, height = image.shape
        minr = max(0, minr - margin)
        minc = max(0, minc - margin)
        maxr = min(width, maxr)
        maxc = min(height, maxc)

        cropped_image = image[minr:maxr, minc:maxc]
        cropped_image = np.where(cropped_image < max_black, 0, cropped_image)
        cropped_image = np.pad(cropped_image, get_padding(cropped_image, pad_image_to_size), 'constant')
        filename = os.path.basename(image_path)
        cropped_image_path = os.path.join(output_folder, f"{filename}")
        io.imsave(cropped_image_path, cropped_image.astype(np.uint8))
        return cropped_image_path
    except Exception as e:
        # raise e
        print(f"Error processing {image_path}: {e}")
        return None

In [39]:
output_folder = '../data/cropped/'
input_folder = '../data/rsna_breast_cancer'

processed_images_paths = []
os.makedirs(output_folder, exist_ok=True)
im_cnt = 0
for filename in os.listdir(input_folder):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
        im_cnt += 1
        image_path = os.path.join(input_folder, filename)
        processed_image_path = process_and_crop_image(image_path, output_folder)