 # Quantifying Skin Colour

 Using the technique described https://arxiv.org/pdf/2202.02832.pdf 

 github https://github.com/pbevan1/Detecting-Melanoma-Fairly 

In [None]:
import os
import pandas as pd
import cv2
from PIL import Image
import math
from skimage import io, color
import torch
from tqdm import tqdm

In [None]:
# Get mean and standard deviation of training data for normalisation
def get_mean_std(loader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _, _, _ in loader:
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data**2, dim=[0, 2, 3])
        num_batches += 1
    mean = channels_sum/num_batches
    std = (channels_squared_sum/num_batches - mean**2)**0.5
    return mean.tolist(), std.tolist()

# Calculates image dimensions from raw images and saves to dataframe
def get_size_from_raw(df):
    def sizeify(filepath):
        image = Image.open(filepath)
        width, height = image.size
        return f'{width}x{height}'
    df['size'] = df['filepath'].apply(lambda x: sizeify(x))
    return df


# Use on ISIC dataframe to get dimensions as single variable
def get_size_ISIC(df):
    df['size'] = 0

    def sizeify(width, height):
        return f'{width}x{height}'

    df['size'] = df.apply(lambda x: sizeify(df.width, df.height), axis=1)
    return df


# Hair removal for ITA calculation
def hair_remove(image):
    # Convert image to grayScale
    grayScale = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    # Kernel for morphologyEx
    kernel = cv2.getStructuringElement(1, (17, 17))
    # Apply MORPH_BLACKHAT to grayScale image
    blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)
    # Apply thresholding to blackhat
    _, threshold = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)
    # Inpaint with original image and threshold image
    final_image = cv2.inpaint(image, threshold, 1, cv2.INPAINT_TELEA)
    return final_image


# Calculates Fitzpatrick skin type of an image using Kinyanjui et al.'s thresholds
def get_sample_ita_kin(path):
    ita_bnd_kin = -1
    try:
        rgb = io.imread(path)

        rgb = hair_remove(rgb)
        lab = color.rgb2lab(rgb)
        ita_lst = []
        ita_bnd_lst = []

        # Taking samples from different parts of the image
        L1 = lab[230:250, 115:135, 0].mean()
        b1 = lab[230:250, 115:135, 2].mean()

        L2 = lab[5:25, 115:135, 0].mean()
        b2 = lab[5:25, 115:135, 2].mean()

        L3 = lab[115:135, 5:25, 0].mean()
        b3 = lab[115:135, 5:25, 2].mean()

        L4 = lab[115:135, 230:250, 0].mean()
        b4 = lab[115:135, 230:250, 2].mean()

        L5 = lab[216:236, 216:236, 0].mean()
        b5 = lab[216:236, 216:236, 2].mean()

        L6 = lab[216:236, 20:40, 0].mean()
        b6 = lab[216:236, 20:40, 2].mean()

        L7 = lab[20:40, 20:40, 0].mean()
        b7 = lab[20:40, 20:40, 2].mean()

        L8 = lab[20:40, 216:236, 0].mean()
        b8 = lab[20:40, 216:236, 2].mean()

        L_lst = [L1, L2, L3, L4, L5, L6, L7, L8]
        b_lst = [b1, b2, b3, b4, b5, b6, b7, b8]

        # Calculating ITA values
        for L, b in zip(L_lst, b_lst):
            ita = math.atan((L - 50) / b) * (180 / math.pi)
            ita_lst.append(ita)

        # Using max ITA value (lightest)
        ita_max = max(ita_lst)

        # Getting skin shade band from ITA
        if ita_max > 55:
            ita_bnd_kin = 1
        if 41 < ita_max <= 55:
            ita_bnd_kin = 2
        if 28 < ita_max <= 41:
            ita_bnd_kin = 3
        if 19 < ita_max <= 28:
            ita_bnd_kin = 4
        if 10 < ita_max <= 19:
            ita_bnd_kin = 5
        if ita_max <= 10:
            ita_bnd_kin = 6
    except Exception:
        pass

    return ita_bnd_kin


# Getting skin types for ISIC data
def get_isic_skin_type():
    # Getting ITA for ISIC Training and saving to csv
    df_train = pd.read_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata.csv')
    image_subset = [
    x.strip(".jpg")
    for x in os.listdir("/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_256")
    ]
    df_train = df_train[df_train["image"].isin(image_subset)]
    df_train['filepath'] = df_train['image'].apply(
        lambda x: os.path.join(f'/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_256/{x}.jpg')
    )

    # df_train['fitzpatrick'] = df_train['filepath'].apply(lambda x: get_sample_ita_kin(x))
    df_train['fitzpatrick'] = [get_sample_ita_kin(x) for x in tqdm(df_train["filepath"])]
    df_train.to_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata.csv', index=False)
    print('Fitzpatrick skin type column added to ISIC_2019_Training_Metadata.csv')


In [None]:
get_isic_skin_type()

100%|██████████| 21/21 [00:10<00:00,  1.95it/s]

Fitzpatrick skin type column added to ISIC_2019_Training_Metadata.csv



