In [1]:
# ref: https://github.com/cauchyturing/kaggle_diabetic_RAM

from google.colab import drive
import os

# Parameters
REPOSITORY_PATH = '/content/drive/My Drive/University Of Stirling/Dissertation/Online Implementations/kaggle_diabetic_RAM-master'

drive.mount('/content/drive')

os.chdir(REPOSITORY_PATH)
print("CWD:",os.getcwd())

Mounted at /content/drive
CWD: /content/drive/My Drive/University Of Stirling/Dissertation/Online Implementations/kaggle_diabetic_RAM-master


In [9]:
DATASET_PATH = REPOSITORY_PATH + "/APTOS2019/"
N_PROC = 4

# Convert.py (Preprocessing)

In [7]:
from __future__ import division, print_function
import os
from multiprocessing.pool import Pool

import click
import numpy as np
from PIL import Image, ImageFilter

In [14]:
"""Resize and crop images to square, save as tiff."""

def convert(fname, crop_size):
    img = Image.open(fname)

    blurred = img.filter(ImageFilter.BLUR)
    ba = np.array(blurred)
    h, w, _ = ba.shape

    if w > 1.2 * h:
        left_max = ba[:, : w // 32, :].max(axis=(0, 1)).astype(int)
        right_max = ba[:, - w // 32:, :].max(axis=(0, 1)).astype(int)
        max_bg = np.maximum(left_max, right_max)

        foreground = (ba > max_bg + 10).astype(np.uint8)
        bbox = Image.fromarray(foreground).getbbox()

        if bbox is None:
            print('bbox none for {} (???)'.format(fname))
        else:
            left, upper, right, lower = bbox
            # if we selected less than 80% of the original 
            # height, just crop the square
            if right - left < 0.8 * h or lower - upper < 0.8 * h:
                print('bbox too small for {}'.format(fname))
                bbox = None
    else:
        bbox = None

    if bbox is None:
        bbox = square_bbox(img)

    cropped = img.crop(bbox)
    resized = cropped.resize([crop_size, crop_size])
    return resized


def square_bbox(img):
    w, h = img.size
    left = max((w - h) // 2, 0)
    upper = 0
    right = min(w - (w - h) // 2, w)
    lower = h
    return (left, upper, right, lower)


def convert_square(fname, crop_size):
    img = Image.open(fname)
    bbox = square_bbox(img)
    cropped = img.crop(bbox)
    resized = cropped.resize([crop_size, crop_size])
    return resized


def get_convert_fname(fname, directory, convert_directory):
    return fname.replace(directory, convert_directory)


def process(args):
    fun, arg = args
    directory, convert_directory, fname, crop_size, extension = arg
    convert_fname = get_convert_fname(fname, directory, 
                                      convert_directory)
    if not os.path.exists(convert_fname):
        img = fun(fname, crop_size)
        save(img, convert_fname) 
    

def save(img, fname):
    img.save(fname, quality=97)


def crop_resize(directory, convert_directory, crop_size, extension):

    try:
        os.mkdir(convert_directory)
    except OSError:
        pass

    filenames = [os.path.join(dp, f) for dp, dn, fn in os.walk(directory)
                 for f in fn if f.endswith('png')] 
    filenames = sorted(filenames)

    print("Resizing images in {} to {}, this takes a while."
          "".format(directory, convert_directory))

    n = len(filenames)
    # process in batches, sometimes weird things happen with Pool on my machine
    batchsize = 10
    batches = n // batchsize + 1
    pool = Pool(N_PROC)

    args = []

    for f in filenames:
        args.append((convert, (directory, convert_directory, f, crop_size, 
                           extension)))

    for i in range(batches):
        print("batch {:>2} / {}".format(i + 1, batches))
        pool.map(process, args[i * batchsize: (i + 1) * batchsize])

    pool.close()

    print('done')


In [15]:
crop_resize(DATASET_PATH + "train_images", DATASET_PATH + "train_images_resized", 128, "png")

Resizing images in /content/drive/My Drive/University Of Stirling/Dissertation/Online Implementations/kaggle_diabetic_RAM-master/APTOS2019/train_images to /content/drive/My Drive/University Of Stirling/Dissertation/Online Implementations/kaggle_diabetic_RAM-master/APTOS2019/train_images_resized, this takes a while.
batch  1 / 367
bbox too small for /content/drive/My Drive/University Of Stirling/Dissertation/Online Implementations/kaggle_diabetic_RAM-master/APTOS2019/train_images/0024cdab0c1e.png
bbox too small for /content/drive/My Drive/University Of Stirling/Dissertation/Online Implementations/kaggle_diabetic_RAM-master/APTOS2019/train_images/00b74780d31d.png
bbox too small for /content/drive/My Drive/University Of Stirling/Dissertation/Online Implementations/kaggle_diabetic_RAM-master/APTOS2019/train_images/00cb6555d108.png
batch  2 / 367
batch  3 / 367
batch  4 / 367
bbox too small for /content/drive/My Drive/University Of Stirling/Dissertation/Online Implementations/kaggle_diabeti