In [5]:
import shutil
shutil.make_archive('DISCARDED IMAGES', 'zip', '/kaggle/working/DISCARDED')

'/kaggle/working/DISCARDED IMAGES.zip'

In [16]:
import shutil
shutil.make_archive('SAVED IMAGES', 'zip', '/kaggle/working/SAVE')

'/kaggle/working/SAVED IMAGES.zip'

In [14]:
rm -rf /kaggle/working/*

In [15]:
import os
import cv2
import numpy as np
import time

def process_contour(image, cont, image_name, save_path, patch_size=256, step_size=256, contour_index=0, **kwargs):
    start_x, start_y, w, h = cv2.boundingRect(cont)

    stop_y = start_y + h
    stop_x = start_x + w

    step_size_x = step_size
    step_size_y = step_size

    x_range = np.arange(start_x, stop_x, step=step_size_x)
    y_range = np.arange(start_y, stop_y, step=step_size_y)
    x_coords, y_coords = np.meshgrid(x_range, y_range, indexing='ij')
    coord_candidates = np.array([x_coords.flatten(), y_coords.flatten()]).transpose()

    patch_count = 0
    discard_count = 0
    for coord in coord_candidates:
        x, y = coord
        patch = image[y:y+patch_size, x:x+patch_size]
        gray_patch = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
        variance = np.var(gray_patch)
        thresh = 20
        if patch.shape[0] == patch_size and patch.shape[1] == patch_size:
            if not is_mostly_white_or_black(patch) and variance >= thresh:
                patch_filename = f"{image_name}_{patch_count}.png"
                patch_path = os.path.join(save_path, patch_filename)
                cv2.imwrite(patch_path, patch)
                patch_count += 1

    #print(f"Extracted {patch_count} patches from contour {contour_index}")

def is_mostly_white_or_black(patch, white_threshold=240, black_threshold=20, white_ratio=0.99, black_ratio=0.20, texture_threshold=20, noise_threshold=5, pink_purple_min_ratio=0.02):
    gray_patch = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
    white_pixels = np.sum(gray_patch > white_threshold)
    black_pixels = np.sum(gray_patch < black_threshold)
    total_pixels = gray_patch.size
    white_pixel_ratio = white_pixels / total_pixels
    black_pixel_ratio = black_pixels / total_pixels

    variance = np.var(gray_patch)

    edges = cv2.Canny(gray_patch, 50, 150)
    noise_pixels = np.sum(edges > 0)

    hsv_patch = cv2.cvtColor(patch, cv2.COLOR_BGR2HSV)
    pink_mask = cv2.inRange(hsv_patch, np.array([130, 50, 50]), np.array([180, 255, 255]))
    purple_mask = cv2.inRange(hsv_patch, np.array([120, 50, 50]), np.array([150, 255, 255]))
    pink_purple_pixels = np.sum(pink_mask > 0) + np.sum(purple_mask > 0)
    pink_purple_ratio = pink_purple_pixels / total_pixels

    return (white_pixel_ratio > white_ratio or black_pixel_ratio > black_ratio or noise_pixels > noise_threshold or pink_purple_ratio < pink_purple_min_ratio) and variance < texture_threshold

def process_contours(image, image_name, save_path, patch_size=256, step_size=256, **kwargs):
    image_folder_path = os.path.join(save_path, image_name)
    os.makedirs(image_folder_path, exist_ok=True)

    print(f"Creating patches for: {image_name}")
    elapsed = time.time()
    
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    contours, _ = cv2.findContours(gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    n_contours = len(contours)
    #print(f"Total number of contours to process: {n_contours}")
    
    for idx, cont in enumerate(contours):
        #print(f'Processing contour {idx + 1}/{n_contours}')
        process_contour(image, cont, image_name, image_folder_path, patch_size, step_size, idx, **kwargs)
    
    print(f"Time elapsed: {time.time() - elapsed:.2f} seconds")

def patching(image_path, save_path, **kwargs):
    try:
        start_time = time.time()

        image = cv2.imread(image_path)
        if image is None:
            print(f"Error reading {image_path}: Image is None")
            return

        image_name = os.path.splitext(os.path.basename(image_path))[0]
        process_contours(image, image_name, save_path, **kwargs)

        patch_time_elapsed = time.time() - start_time
        print(f"Patching completed for {image_name} in {patch_time_elapsed:.2f} seconds")
    except Exception as e:
        print(f"An error occurred while processing {image_path}: {e}")

def process_folder(input_folder, save_path, **kwargs):
    for filename in os.listdir(input_folder):
        if filename.endswith(".png"):
            image_path = os.path.join(input_folder, filename)
            patching(image_path, save_path, **kwargs)
            
input_folder = "/kaggle/input/small-dataset/check"
save_path = "/kaggle/working/SAVE"
process_folder(input_folder, save_path, patch_size=512, step_size=512)


Creating patches for: TCGA-4N-A93T_nonMSIH1
Time elapsed: 7.28 seconds
Patching completed for TCGA-4N-A93T_nonMSIH1 in 9.67 seconds
Creating patches for: TCGA-AA-3715_MSIH13
Time elapsed: 11.44 seconds
Patching completed for TCGA-AA-3715_MSIH13 in 15.01 seconds
Creating patches for: TCGA-3L-AA1B_nonMSIH0
Time elapsed: 8.11 seconds
Patching completed for TCGA-3L-AA1B_nonMSIH0 in 10.68 seconds
