In [6]:
'''
fundus images preprocess

del_black_or_white  delete borders of fundus images

detect_xyr  using HoughCircles detect circle, if not detected
  suppose the center of the image is the center of the circle.


my_crop_xyz  crot the image based on circle detected

after croped, add some black margin areas,
   so that img aug(random rotate clip) will not delete meaningful edge region

'''

import cv2
import numpy as np
import os
# from imgaug import augmenters as iaa


DEL_PADDING_RATIO = 0.02  #used for del_black_or_white
CROP_PADDING_RATIO = 0.02  #used for my_crop_xyr

# del_black_or_white margin
THRETHOLD_LOW = 7
THRETHOLD_HIGH = 180

# HoughCircles
MIN_REDIUS_RATIO = 0.33
MAX_REDIUS_RATIO = 0.6

def del_black_or_white(img1):
    if img1.ndim == 2:
        img1 = np.expand_dims(img1, axis=-1)

    width, height = (img1.shape[1], img1.shape[0])

    (left, bottom) = (0, 0)
    (right, top) = (img1.shape[1], img1.shape[0])

    padding = int(min(width, height) * DEL_PADDING_RATIO)

    #cv2  height, width

    for i in range(width):
        array1 = img1[:, i, :]  
        if np.sum(array1) > THRETHOLD_LOW * array1.shape[0] * array1.shape[1] and \
                np.sum(array1) < THRETHOLD_HIGH * array1.shape[0] * array1.shape[1]:
            left = i
            break
    left = max(0, left-padding) 

    for i in range(width - 1, 0 - 1, -1):
        array1 = img1[:, i, :]
        if np.sum(array1) > THRETHOLD_LOW * array1.shape[0] * array1.shape[1] and \
                np.sum(array1) < THRETHOLD_HIGH * array1.shape[0] * array1.shape[1]:
            right = i
            break
    right = min(width, right + padding)  

    for i in range(height):
        array1 = img1[i, :, :]
        if np.sum(array1) > THRETHOLD_LOW * array1.shape[0] * array1.shape[1] and \
                np.sum(array1) < THRETHOLD_HIGH * array1.shape[0] * array1.shape[1]:
            bottom = i
            break
    bottom = max(0, bottom - padding)

    for i in range(height - 1, 0 - 1, -1):
        array1 = img1[i, :, :]
        if np.sum(array1) > THRETHOLD_LOW * array1.shape[0] * array1.shape[1] and \
                np.sum(array1) < THRETHOLD_HIGH * array1.shape[0] * array1.shape[1]:
            top = i
            break
    top = min(height, top + padding)

    img2 = img1[bottom:top, left:right, :]

    return img2

def detect_xyr(img_source):
    if isinstance(img_source, str):
        try:
            img = cv2.imread(img_source)
        except:
            # Corrupt JPEG data1: 19 extraneous bytes before marker 0xc4
            raise Exception("image file not found:" + img_source)
        if img is None:
            raise Exception("image file error:" + img_source)
    else:
        img = img_source


    width = img.shape[1]
    height = img.shape[0]

    myMinWidthHeight = min(width, height)

    myMinRadius = round(myMinWidthHeight * MIN_REDIUS_RATIO)
    myMaxRadius = round(myMinWidthHeight * MAX_REDIUS_RATIO)

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, 1, minDist=450, param1=120, param2=32,
                               minRadius=myMinRadius,
                               maxRadius=myMaxRadius)

    (x, y, r) = (0, 0, 0)
    found_circle = False

    if circles is not None:
        circles = np.round(circles[0, :]).astype("int")
        if (circles is not None) and (len(circles == 1)):
            # x width, y height

            x1, y1, r1 = circles[0]
            if x1 > (2 / 5 * width) and x1 < (3 / 5 * width) \
                    and y1 > (2 / 5 * height) and y1 < (3 / 5 * height):
                x, y, r = circles[0]
                found_circle = True

    if not found_circle:
        # suppose the center of the image is the center of the circle.
        x = img.shape[1] // 2
        y = img.shape[0] // 2

        # get radius  according to the distribution of pixels of the middle line
        temp_x = img[int(img.shape[0] / 2), :, :].sum(1)
        r = int((temp_x > temp_x.mean() / 12).sum() / 2)

    return (found_circle, x, y, r)

def my_crop_xyr(img_source, x, y, r, crop_size=None):
    if isinstance(img_source, str):
        # img_source is a file name
        try:
            image1 = cv2.imread(img_source)
        except:
            # Corrupt JPEG data1: 19 extraneous bytes before marker 0xc4
            raise Exception("image file not found:" + img_source)
    else:
        image1 = img_source

    if image1 is None:
        raise Exception("image file error:" + img_source)

    original_width = image1.shape[1]
    original_height = image1.shape[0]

    (image_height, image_width) = (image1.shape[0], image1.shape[1])

    img_padding = int(min(original_width, original_height) * CROP_PADDING_RATIO)

    image_left = int(max(0, x - r - img_padding))
    image_right = int(min(x + r + img_padding, image_width - 1))
    image_bottom = int(max(0, y - r - img_padding))
    image_top = int(min(y + r + img_padding, image_height - 1))

    if image_width >= image_height:  # 图像宽比高大
        if image_height >= 2 * (r + img_padding):
            # 图像比圆大
            image1 = image1[image_bottom: image_top, image_left:image_right]
        else:

            image1 = image1[:, image_left:image_right]
    else: 
        if image_width >= 2 * (r + img_padding):

            image1 = image1[image_bottom: image_top, image_left:image_right]
        else:
            image1 = image1[image_bottom:image_top, :]

    if crop_size is not None:
        image1 = cv2.resize(image1, (crop_size, crop_size))

    return image1

def add_black_margin(img_source, add_black_pixel_ratio = 0.05):
    if isinstance(img_source, str):
        # img_source is a file name
        try:
            image1 = cv2.imread(img_source)
        except:
            # Corrupt JPEG data1: 19 extraneous bytes before marker 0xc4
            raise Exception("image file not found:" + img_source)
    else:
        image1 = img_source

    if image1 is None:
        raise Exception("image file error:" + img_source)

    height, width = image1.shape[:2]

    add_black_pixel = int(min(height, width) * add_black_pixel_ratio)

    img_h = np.zeros((add_black_pixel, width, 3))
    img_v = np.zeros((height + add_black_pixel*2, add_black_pixel, 3))

    image1 = np.concatenate((img_h, image1, img_h), axis=0)
    image1 = np.concatenate((img_v, image1, img_v), axis=1)

    return image1



def my_preprocess(img_source, crop_size, train_or_valid='train', img_file_dest=None):
    if isinstance(img_source, str):
        try:
            image1 = cv2.imread(img_source)
        except:
            raise Exception("image file not found:" + img_source)
    else:
        image1 = img_source

    if image1 is None:  #file not exists or orther errors
        raise Exception("image file error:" + img_source)

    image1 = del_black_or_white(image1)

    min_width_height = min(image1.shape[0], image1.shape[1])

    if min_width_height < 100: # image too small
        return None

    #image too big, resize
    image_size_before_hough = crop_size * 2
    if min_width_height > image_size_before_hough:
        crop_ratio = image_size_before_hough / min_width_height
        # fx、fy: Scaling factor  x axis and y axis
        image1 = cv2.resize(image1, None, fx=crop_ratio, fy=crop_ratio)

    (found_circle, x, y, r) = detect_xyr(image1)

    if train_or_valid == 'train':
        image1 = my_crop_xyr(image1, x, y, r)
        # add some black margin, for fear that duing img aug(random rotate crop) delete useful areas
        image1 = add_black_margin(image1, add_black_pixel_ratio=0.07)
        image1 = cv2.resize(image1, (crop_size, crop_size))
    else:
        image1 = my_crop_xyr(image1, x, y, r, crop_size)

    # if specify img_file_dest,  save to file
    if img_file_dest is not None:
        if not os.path.exists(os.path.dirname(img_file_dest)):
            os.makedirs(os.path.dirname(img_file_dest))

        cv2.imwrite(img_file_dest, image1)

    return image1

# simple demo code
if __name__ == '__main__':
    img_file = 'data/Training Images/Wet/53_left.jpg'
    if os.path.exists(img_file):
        img_processed = my_preprocess(img_file, crop_size=384)
        cv2.imwrite('tmp_preprocess.jpg', img_processed)
        print('OK')
    else:
        print('file not exists!')

OK


In [14]:
 # Python script to create train/test/valid
import os
import shutil
import random
folders = ['Dry', 'Wet', 'Normal']
for folder in folders:
    original_folder = f"data/Training Images/{folder}"
    
    # Define paths for the train, test, and validation sets
    train_folder = f"dataset/train/{folder}"
    test_folder = f"dataset/test/{folder}"
    valid_folder = f"dataset/valid/{folder}"
    
    # Define the split ratios
    train_ratio = 0.8
    valid_ratio = 0.1
    test_ratio = 0.1  
    
    # Create directories if they don't exist
    for folder in [train_folder, test_folder, valid_folder]:
        os.makedirs(folder, exist_ok=True)
    
    # Get the list of images
    images = os.listdir(original_folder)
    random.shuffle(images)
    
    # Calculate the number of images for each set
    num_images = len(images)
    num_train = int(train_ratio * num_images)
    num_test = int(test_ratio * num_images)
    num_valid = num_images - num_train - num_test
    
    # Copy images to the respective folders
    for i, image in enumerate(images):
        src_path = os.path.join(original_folder, image)
        if i < num_train:
            dst_path = os.path.join(train_folder, image)
        elif num_train <= i < num_train + num_test:
            dst_path = os.path.join(test_folder, image)
        else:
            dst_path = os.path.join(valid_folder, image)
        shutil.copy(src_path, dst_path)

In [15]:
import os

# Function to process images in a directory and its subdirectories
def process_images_in_directory(directory_path):
    # Iterate through all files and subdirectories in the directory
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            # Check if the file is an image file (you can add more image formats if needed)
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
                # Get the full path of the image file
                image_path = os.path.join(root, file)
                
                # Resize the image
                resized_image = my_preprocess(image_path, 384)
                
                # Save the resized image, overwriting the original file
                cv2.imwrite(image_path, resized_image)
                print(f"Image preprocessed {image_path}")

# Define the directory containing images
directory_path = "dataset"

# Define the desired output size

# Process images in the directory and its subdirectories
process_images_in_directory(directory_path)

Image preprocessed dataset\test\Dry\1085_left.jpg
Image preprocessed dataset\test\Dry\1154_right.jpg
Image preprocessed dataset\test\Dry\1202_right.jpg
Image preprocessed dataset\test\Dry\126_left.jpg
Image preprocessed dataset\test\Dry\1801_right.jpg
Image preprocessed dataset\test\Dry\1835_right.jpg
Image preprocessed dataset\test\Dry\1847_right.jpg
Image preprocessed dataset\test\Dry\1859_right.jpg
Image preprocessed dataset\test\Dry\1862_right.jpg
Image preprocessed dataset\test\Dry\1877_left.jpg
Image preprocessed dataset\test\Dry\1877_right.jpg
Image preprocessed dataset\test\Dry\1881_left.jpg
Image preprocessed dataset\test\Dry\1892_left.jpg
Image preprocessed dataset\test\Dry\230_right.jpg
Image preprocessed dataset\test\Dry\247_left.jpg
Image preprocessed dataset\test\Dry\410_left.jpg
Image preprocessed dataset\test\Dry\547_left.jpg
Image preprocessed dataset\test\Dry\564_left.jpg
Image preprocessed dataset\test\Dry\565_left.jpg
Image preprocessed dataset\test\Dry\815_left.jpg