# Generating Retinal dataset

Due to the repetitive patterns of these retinal images, we are splitting each image into many small images (still with visible vessels). This allows for a larger training set, and less details for the neural network to learn per training sample.

The images will be cut into 128 x 128 crops. There will be a stride of 48 pixel between each 2 crops, both vertically and horizontally. This is to ensure that we obtain a large amount of data from the limited images we have.

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Import
import os
import numpy as np
from PIL import Image

In [3]:
# Define paths
base_path = '/content/drive/MyDrive/ML_projects/VesselProject/RetinalImages/Processed'
processed_cropped_path = '/content/drive/My Drive/ML_projects/VesselProject/RetinalImages/Processed_cropped'

The following section is for all our functions:

In [4]:
# A function to apply the cropping operation with chosen parameters

def crop_and_save(image_path, mask_path, save_image_dir, save_mask_dir, crop_height=48, crop_width=48, division=3):
    # Open the image and mask
    image = Image.open(image_path)
    mask = Image.open(mask_path)

    # Convert to numpy arrays
    image_np = np.array(image)
    mask_np = np.array(mask)

    # Get the dimensions of the image
    img_height, img_width = image_np.shape[:2]

    crop_number = 0
    image_width = image_np.shape[1]
    image_height = image_np.shape[0]

    # Loop for cropping
    i = 0
    while i * (crop_height //division) + crop_height <= image_height:
        j=0
        while j * (crop_width //division) + crop_width <= image_width:
            crop_number += 1
            left = j * (crop_width //division)
            upper = i * (crop_height //division)
            right = left + crop_width
            lower = upper + crop_height

            j += 1

            # Crop the image and mask
            image_crop = image_np[upper:lower, left:right]
            mask_crop = mask_np[upper:lower, left:right]

            # Save the crops
            image_name = os.path.splitext(os.path.basename(image_path))[0] + f'_crop{crop_number}.png'
            mask_name = os.path.splitext(os.path.basename(mask_path))[0] + f'_crop{crop_number}.png'

            Image.fromarray(image_crop).save(os.path.join(save_image_dir, image_name), format='PNG')
            Image.fromarray(mask_crop).save(os.path.join(save_mask_dir, mask_name), format='PNG')

        i += 1

In [5]:
# A function to save all the cropped images in the chosen folder

def process_folder(folder_type):

    images_path = os.path.join(base_path, folder_type, 'Images')
    masks_path = os.path.join(base_path, folder_type, 'Masks')

    save_images_path = os.path.join(processed_cropped_path, folder_type, 'Images')
    save_masks_path = os.path.join(processed_cropped_path, folder_type, 'Masks')

    image_files = sorted(os.listdir(images_path))
    mask_files = sorted(os.listdir(masks_path))

    i = 0
    for image_file, mask_file in zip(image_files, mask_files):
        i += 1
        image_path = os.path.join(images_path, image_file)
        mask_path = os.path.join(masks_path, mask_file)
        crop_and_save(image_path, mask_path, save_images_path, save_masks_path, crop_height=128, crop_width=128, division=4)


For processing and understanding data:


In [6]:
# Process train and test folders
process_folder('Test')

print("Processing complete.")

Processing complete.


In [7]:
# Count files in "Processed" and "Processed_cropped" for test set
test_processed_images_path = os.path.join(base_path, 'Test', 'Images')
test_processed_masks_path = os.path.join(base_path, 'Test', 'Masks')
test_processed_cropped_images_path = os.path.join(processed_cropped_path, 'Test', 'Images')
test_processed_cropped_masks_path = os.path.join(processed_cropped_path, 'Test', 'Masks')

test_processed_images_count = len(os.listdir(test_processed_images_path))
test_processed_masks_count = len(os.listdir(test_processed_masks_path))
test_processed_cropped_images_count = len(os.listdir(test_processed_cropped_images_path))

In [8]:
# Print results
print("Test set:")
print("- Processed images:", test_processed_images_count)
print("- Processed masks:", test_processed_masks_count)
print("- Processed cropped images:", test_processed_cropped_images_count)

Test set:
- Processed images: 20
- Processed masks: 20
- Processed cropped images: 3380


In [9]:
# Process train and test folders
process_folder('Train')

print("Processing complete.")

Processing complete.


In [10]:
# Count files in "Processed" and "Processed_cropped" for train set

train_processed_images_path = os.path.join(base_path, 'Train', 'Images')
train_processed_masks_path = os.path.join(base_path, 'Train', 'Masks')
train_processed_cropped_images_path = os.path.join(processed_cropped_path, 'Train', 'Images')
train_processed_cropped_masks_path = os.path.join(processed_cropped_path, 'Train', 'Masks')

train_processed_images_count = len(os.listdir(train_processed_images_path))
train_processed_masks_count = len(os.listdir(train_processed_masks_path))
train_processed_cropped_images_count = len(os.listdir(train_processed_cropped_images_path))

In [11]:
# Print results
print("Train set:")
print("- Processed images:", train_processed_images_count)
print("- Processed masks:", train_processed_masks_count)
print("- Processed cropped images:", train_processed_cropped_images_count)

Train set:
- Processed images: 146
- Processed masks: 146
- Processed cropped images: 24674
