# Image pre-processing
This file contains several classes, for image preprocessing. Including:
- HairRemoval(): hair removal.
- CropBlackCircle(): cropping strategy to cut large black boundary.
- ShadesOfGrey(): colour adaptation.
- ImageResize(): using the PIL package resize with the bilinear option.

all code has been referenced, where appropriate. 

Futher, the code used to apply the final chosen pre-processing stages (hair removal and cropping stragey) to all images is included. 

NB// the resizing step was instead carried out using the Dataloader class prior to model training. 

In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm

Collecting ipdb
  Downloading ipdb-0.13.9.tar.gz (16 kB)
Collecting ipython>=7.17.0
  Downloading ipython-7.33.0-py3-none-any.whl (793 kB)
[K     |████████████████████████████████| 793 kB 6.3 MB/s 
[?25hCollecting toml>=0.10.2
  Downloading toml-0.10.2-py2.py3-none-any.whl (16 kB)
Collecting prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0
  Downloading prompt_toolkit-3.0.29-py3-none-any.whl (381 kB)
[K     |████████████████████████████████| 381 kB 51.7 MB/s 
Building wheels for collected packages: ipdb
  Building wheel for ipdb (setup.py) ... [?25l[?25hdone
  Created wheel for ipdb: filename=ipdb-0.13.9-py3-none-any.whl size=11648 sha256=823201db0caf18feea45dca04a242a4e813cca927f0f6a6b196cf82cc2072192
  Stored in directory: /root/.cache/pip/wheels/65/cd/cc/aaf92acae337a28fdd2aa4d632196a59745c8c39f76eaeed01
Successfully built ipdb
Installing collected packages: prompt-toolkit, toml, ipython, ipdb
  Attempting uninstall: prompt-toolkit
    Found existing installation: prompt-toolkit 1.

In [None]:
class PILtoCV2(object):

    def __call__(self, image):
        return cv2.cvtColor(
            np.array(image),
            cv2.COLOR_RGB2BGR
        )


class CV2toPIL(object):

    def __call__(self, image):
        color_coverted = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        return Image.fromarray(color_coverted)


class ImageResize(object):
        """
        PIL's resize performs better than pytorch
        https://blog.zuru.tech/machine-learning/2021/08/09/the-dangers-behind-image-resizing
        """

        def __init__(self, new_h, new_w):
            self.new_h = new_h
            self.new_w = new_w

        def __call__(self, image):
            image = image.resize((self.new_w, self.new_h), resample=Image.BILINEAR)
            return image

class HairRemoval(object):
        """
        Hair removal code
        https://github.com/ThiruRJST/Melanoma_Classification 
        """

        def process(self, image):
            # convert image to grayScale
            grayScale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            # kernel for morphologyEx
            kernel = cv2.getStructuringElement(1, (17, 17))

            # apply MORPH_BLACKHAT to grayScale image
            blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)

            # apply thresholding to blackhat
            _, threshold = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)

            # inpaint with original image and threshold image
            final_image = cv2.inpaint(image, threshold, 1, cv2.INPAINT_TELEA)
           
            return final_image


class ShadesOfGrey(object):
        """
        Code from https://github.com/ThiruRJST/Melanoma_Classification
        imgage (numpy array): the original image with format of (h, w, c)
        power (int): the degree of norm, 6 is used in reference paper
        gamma (float): the value of gamma correction, 2.2 is used in reference paper
        """

        def __init__(self, power=6, gamma=2.2):
            self.power = power
            self.gamma = gamma

        def process(self, image):
            
            image_dtype = image.dtype

            if self.gamma is not None:
                image = image.astype('uint8')
                look_up_table = np.ones((256, 1), dtype='uint8') * 0
                for i in range(256):
                    look_up_table[i][0] = 255 * pow(i / 255, 1 / self.gamma)
                image = cv2.LUT(image, look_up_table)

            image = image.astype('float32')
            image_power = np.power(image, self.power)
            rgb_vec = np.power(np.mean(image_power, (0, 1)), 1 / self.power)
            rgb_norm = np.sqrt(np.sum(np.power(rgb_vec, 2.0)))
            rgb_vec = rgb_vec / rgb_norm
            rgb_vec = 1 / (rgb_vec * np.sqrt(3))
            image = np.multiply(image, rgb_vec)

            # Andrew Anikin suggestion
            image = np.clip(image, a_min=0, a_max=255)
            
            return image.astype(image_dtype)
    
class CropBlackCircle(object):
    """
    https://stackoverflow.com/questions/61986407/crop-x-ray-image-to-remove-black-background 
    """
    def process(self, image):
        
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        # threshold 
        thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1]
        hh, ww = thresh.shape

        # make bottom 2 rows black where they are white the full width of the image
        thresh[hh-3:hh, 0:ww] = 0

        # get bounds of white pixels
        white = np.where(thresh==255)
        xmin, ymin, xmax, ymax = np.min(white[1]), np.min(white[0]), np.max(white[1]), np.max(white[0])

        # crop the image at the bounds adding back the two blackened rows at the bottom
        crop = image[ymin:ymax+3, xmin:xmax]
        
        return crop

In [None]:
# version 1: includes hair removal circle cropout, and colour consistancy
hair_remover = HairRemoval()
crop_black_circle = CropBlackCircle()
shades_of_grey = ShadesOfGrey()


def version_1 (image):
    image = hair_remover.process(image)
    image = crop_black_circle.process(image)
    image = shades_of_grey.process(image)    
    return image

# version 2: includes circle cropout and colour consistancy

def version_2 (image):
    image = hair_remover.process(image)
    image = crop_black_circle.process(image)     
    return image

In [None]:
# for root, dirs, files in tqdm(os.walk("/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v1_prepro", topdown=True)):
#     for name in tqdm(files):
#         image_path = os.path.join(root, name)
#         # Load image
#         img = cv2.imread(image_path)
#         # Change image
#         img = version_1(img)
#         # Write new image to image_path
#         cv2.imwrite(image_path, img)

# same as above but for version_2 preprocessing 
# for root, dirs, files in tqdm(os.walk(original_root_path, topdown=True)):
#     for name in tqdm(files):
#         image_path = os.path.join(root, name)
#         img = cv2.imread(image_path)
#         img = version_2(img)
#         cv2.imwrite(image_path, img)

In [None]:
def version_2 (image):
    image = hair_remover.process(image)
    image = crop_black_circle.process(image)     
    return image

original_root_path = "/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Split_val"
original_root_folder = "ISIC_2019_Split_val"
new_root_folder = "ISIC_2019_v2_prepro"

# os.makedirs(original_root_path.replace(original_root_folder, new_root_folder), exist_ok=True)

# print("Making directory structure")
# for root, dirs, files in tqdm(os.walk(original_root_path, topdown=True)):
#     for dir in dirs:
#         new_folder = os.path.join(root, dir).replace(original_root_folder, new_root_folder)
#         # print(new_folder)
#         os.makedirs(new_folder)

print("Processing images")
for root, dirs, files in os.walk(original_root_path, topdown=True):
    print(f"Processing {root}")
    for name in files:
        original_image_path = os.path.join(root, name)
        new_image_path = original_image_path.replace(original_root_folder, new_root_folder)
        if os.path.exists(new_image_path):
            continue
        else:
            print(f"{original_root_path} -> {new_image_path}")
            img = cv2.imread(original_image_path)
            img = version_2(img)
            cv2.imwrite(new_image_path, img)
