In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm

In [None]:
class PILtoCV2(object):

    def __call__(self, image):
        return cv2.cvtColor(
            np.array(image),
            cv2.COLOR_RGB2BGR
        )


class CV2toPIL(object):

    def __call__(self, image):
        color_coverted = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return Image.fromarray(color_coverted)


class ImageResize(object):
        """
        PIL's resize performs better than pytorch
        https://blog.zuru.tech/machine-learning/2021/08/09/the-dangers-behind-image-resizing
        """

        def __init__(self, new_h, new_w):
            self.new_h = new_h
            self.new_w = new_w

        def __call__(self, image):
            image = image.resize((self.new_w, self.new_h), resample=Image.BILINEAR)
            return image

class HairRemoval(object):
        """
        Hair removal code
        https://github.com/ThiruRJST/Melanoma_Classification 
        """

        def process(self, image):
            
            # convert image to grayScale
            grayScale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # kernel for morphologyEx
            kernel = cv2.getStructuringElement(1, (17, 17))

            # apply MORPH_BLACKHAT to grayScale image
            blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)

            # apply thresholding to blackhat
            _, threshold = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)

            # inpaint with original image and threshold image
            final_image = cv2.inpaint(image, threshold, 1, cv2.INPAINT_TELEA)
           
            return final_image


class ShadesOfGrey(object):
        """
        Code from https://github.com/ThiruRJST/Melanoma_Classification
        imgage (numpy array): the original image with format of (h, w, c)
        power (int): the degree of norm, 6 is used in reference paper
        gamma (float): the value of gamma correction, 2.2 is used in reference paper
        """

        def __init__(self, power=6, gamma=2.2):
            self.power = power
            self.gamma = gamma

        def __call__(self, image):
            
            image_dtype = image.dtype

            if self.gamma is not None:
                image = image.astype('uint8')
                look_up_table = np.ones((256, 1), dtype='uint8') * 0
                for i in range(256):
                    look_up_table[i][0] = 255 * pow(i / 255, 1 / self.gamma)
                image = cv2.LUT(image, look_up_table)

            image = image.astype('float32')
            image_power = np.power(image, self.power)
            rgb_vec = np.power(np.mean(image_power, (0, 1)), 1 / self.power)
            rgb_norm = np.sqrt(np.sum(np.power(rgb_vec, 2.0)))
            rgb_vec = rgb_vec / rgb_norm
            rgb_vec = 1 / (rgb_vec * np.sqrt(3))
            image = np.multiply(image, rgb_vec)

            # Andrew Anikin suggestion
            image = np.clip(image, a_min=0, a_max=255)
            
            return image.astype(image_dtype)
    
class CropBlackCircle(object):
    """
    https://stackoverflow.com/questions/61986407/crop-x-ray-image-to-remove-black-background 
    """
    def __call__(self, image):
        
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # threshold 
        thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
        hh, ww = thresh.shape

        # make bottom 2 rows black where they are white the full width of the image
        thresh[hh-3:hh, 0:ww] = 0

        # get bounds of white pixels
        white = np.where(thresh==255)
        xmin, ymin, xmax, ymax = np.min(white[1]), np.min(white[0]), np.max(white[1]), np.max(white[0])

        # crop the image at the bounds adding back the two blackened rows at the bottom
        crop = image[ymin:ymax+3, xmin:xmax]
        
        return crop

In [None]:
# version 1: includes hair removal circle cropout, and colour consistancy

def version_1 (image):
    image = HairRemoval()(image)
    image = CropBlackCircle()(image)
    image = ShadesOfGrey()(image)    
    return image

# version 2: includes circle cropout and colour consistancy

def version_2 (image):
    image = CropBlackCircle()(image)
    image = ShadesOfGrey()(image)      
    return image

In [None]:

for root, dirs, files in os.walk("/home/jake/Desktop/data", topdown=True):
  for name in files:
    image_path = os.path.join(root, name)
    # Load image
    img = cv2.imread("image_path")
    # Change image
    img = version_1(img)
    # Write new image to image_path
    cv2.imwrite(image_path, img)

# same as above but for version_2 preprocessing 
for root, dirs, files in os.walk("/home/jake/Desktop/data", topdown=True):
  for name in files:
    image_path = os.path.join(root, name)
    img = cv2.imread("image_path")
    img = version_2(img)
    cv2.imwrite(image_path, img)
    

In [None]:
# copying directory and its contents

!rsync -avKh --progress --ignore-existing "/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Split_val" "/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v1_prepro"
!rsync -avKh --progress --ignore-existing "/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Split_val" "/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro"

sending incremental file list
ISIC_2019_Split_val/
ISIC_2019_Split_val/test/
ISIC_2019_Split_val/test/AK/
ISIC_2019_Split_val/test/AK/ISIC_0025825.jpg
        294.40K 100%   49.90MB/s    0:00:00 (xfr#1, ir-chk=1157/1170)
ISIC_2019_Split_val/test/AK/ISIC_0026149.jpg
        231.51K 100%  866.23kB/s    0:00:00 (xfr#2, ir-chk=1156/1170)
ISIC_2019_Split_val/test/AK/ISIC_0026525.jpg
        273.77K 100%  576.20kB/s    0:00:00 (xfr#3, ir-chk=1155/1170)
ISIC_2019_Split_val/test/AK/ISIC_0026626.jpg
        290.93K 100%  364.25kB/s    0:00:00 (xfr#4, ir-chk=1154/1170)
ISIC_2019_Split_val/test/AK/ISIC_0028076.jpg
        256.83K 100%  254.11kB/s    0:00:00 (xfr#5, ir-chk=1153/1170)
ISIC_2019_Split_val/test/AK/ISIC_0028190.jpg
        252.12K 100%  195.10kB/s    0:00:01 (xfr#6, ir-chk=1152/1170)
ISIC_2019_Split_val/test/AK/ISIC_0028370.jpg
        276.79K 100%  615.71kB/s    0:00:00 (xfr#7, ir-chk=1151/1170)
ISIC_2019_Split_val/test/AK/ISIC_0028517.jpg
        291.59K 100%  447.73kB/s    0:00:00 