 # Quantifying Skin Colour

 Using the technique described https://arxiv.org/pdf/2202.02832.pdf 

 github https://github.com/pbevan1/Detecting-Melanoma-Fairly 

In [2]:
import os
import pandas as pd
import cv2
from PIL import Image
import math
from skimage import io, color
import torch
from tqdm import tqdm
import json

In [55]:
# Get mean and standard deviation of training data for normalisation
def get_mean_std(loader):
    channels_sum, channels_squared_sum, num_batches = 0, 0, 0
    for data, _, _, _ in loader:
        channels_sum += torch.mean(data, dim=[0, 2, 3])
        channels_squared_sum += torch.mean(data**2, dim=[0, 2, 3])
        num_batches += 1
    mean = channels_sum/num_batches
    std = (channels_squared_sum/num_batches - mean**2)**0.5
    return mean.tolist(), std.tolist()

# Calculates image dimensions from raw images and saves to dataframe
def get_size_from_raw(df):
    def sizeify(filepath):
        image = Image.open(filepath)
        width, height = image.size
        return f'{width}x{height}'
    df['size'] = df['filepath'].apply(lambda x: sizeify(x))
    return df


# Use on ISIC dataframe to get dimensions as single variable
def get_size_ISIC(df):
    df['size'] = 0

    def sizeify(width, height):
        return f'{width}x{height}'

    df['size'] = df.apply(lambda x: sizeify(df.width, df.height), axis=1)
    return df


# Hair removal for ITA calculation
def hair_remove(image):
    # Convert image to grayScale
    grayScale = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    # Kernel for morphologyEx
    kernel = cv2.getStructuringElement(1, (17, 17))
    # Apply MORPH_BLACKHAT to grayScale image
    blackhat = cv2.morphologyEx(grayScale, cv2.MORPH_BLACKHAT, kernel)
    # Apply thresholding to blackhat
    _, threshold = cv2.threshold(blackhat, 10, 255, cv2.THRESH_BINARY)
    # Inpaint with original image and threshold image
    final_image = cv2.inpaint(image, threshold, 1, cv2.INPAINT_TELEA)
    return final_image


# Calculates Fitzpatrick skin type of an image using Kinyanjui et al.'s thresholds
def get_sample_ita_kin(rgb):
    ita_bnd_kin = -1
    try:
        # rgb = io.imread(path)
        # rgb = rgb.resize((256, 256), Image.ANTIALIAS)
        #rgb = hair_remove(rgb)
        lab = color.rgb2lab(rgb)
        ita_lst = []
        ita_bnd_lst = []

        # Taking samples from different parts of the image
        L1 = lab[230:250, 115:135, 0].mean()
        b1 = lab[230:250, 115:135, 2].mean()

        L2 = lab[5:25, 115:135, 0].mean()
        b2 = lab[5:25, 115:135, 2].mean()

        L3 = lab[115:135, 5:25, 0].mean()
        b3 = lab[115:135, 5:25, 2].mean()

        L4 = lab[115:135, 230:250, 0].mean()
        b4 = lab[115:135, 230:250, 2].mean()

        L5 = lab[216:236, 216:236, 0].mean()
        b5 = lab[216:236, 216:236, 2].mean()

        L6 = lab[216:236, 20:40, 0].mean()
        b6 = lab[216:236, 20:40, 2].mean()

        L7 = lab[20:40, 20:40, 0].mean()
        b7 = lab[20:40, 20:40, 2].mean()

        L8 = lab[20:40, 216:236, 0].mean()
        b8 = lab[20:40, 216:236, 2].mean()

        L_lst = [L1, L2, L3, L4, L5, L6, L7, L8]
        b_lst = [b1, b2, b3, b4, b5, b6, b7, b8]

        # Calculating ITA values
        for L, b in zip(L_lst, b_lst):
            ita = math.atan((L - 50) / b) * (180 / math.pi)
            ita_lst.append(ita)

        # Using max ITA value (lightest)
        ita_max = max(ita_lst)

        # Getting skin shade band from ITA
        if ita_max > 55:
            ita_bnd_kin = 1
        if 41 < ita_max <= 55:
            ita_bnd_kin = 2
        if 28 < ita_max <= 41:
            ita_bnd_kin = 3
        if 19 < ita_max <= 28:
            ita_bnd_kin = 4
        if 10 < ita_max <= 19:
            ita_bnd_kin = 5
        if ita_max <= 10:
            ita_bnd_kin = 6
    except Exception:
        pass

    return ita_bnd_kin


# Getting skin types for ISIC data
# def get_isic_skin_type():
#     # Getting ITA for ISIC Training and saving to csv
#     df_train = pd.read_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata.csv')
#     image_subset = [
#     x.strip(".jpg")
#     for x in os.listdir("/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro")
#     ]
#     df_train = df_train[df_train["image"].isin(image_subset)]
#     df_train['filepath'] = df_train['image'].apply(
#         lambda x: os.path.join(f'/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_256/{x}.jpg')
#     )

#     # df_train['fitzpatrick'] = df_train['filepath'].apply(lambda x: get_sample_ita_kin(x))
#     df_train['fitzpatrick'] = [get_sample_ita_kin(x) for x in tqdm(df_train["filepath"])]
#     df_train.to_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata.csv', index=False)
#     print('Fitzpatrick skin type column added to ISIC_2019_Training_Metadata.csv')


def get_additional_metadata():
    df = pd.read_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata.csv')
    image_folder = "/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro"

    df["image_path"] = [-1]*len(df)
    df["rand_split"] = ["None"] * len(df)

    for root, dirs, files in os.walk(image_folder):
        print(f"Processing {root}")
        for file in tqdm(files):
            if "/train/" in root:
                split = "train"
            elif "/val/" in root:
                split = "val"
            else:
                split = "test"

            image_path = os.path.join(root, file)
            image_id = file.split(".")[0]
            df.loc[df.image == image_id, "image_path"] = image_path
            df.loc[df.image == image_id, "rand_split"] = split
            
            image = Image.open(image_path)
            image = image.resize((256, 256))
            fitz_score = get_sample_ita_kin(image)

            df.loc[df.image == image_id, "fitzpatrick"] = fitz_score

        print("Writing to file")
        df.to_csv(
            '/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata_with_full_paths.csv',
            index=False
        )
    print("Complete!")


In [56]:
get_additional_metadata()

Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro


0it [00:00, ?it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/train


0it [00:00, ?it/s]

Writing to file





Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/train/NV


100%|██████████| 10300/10300 [09:57<00:00, 17.23it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/train/SCC


100%|██████████| 502/502 [00:35<00:00, 14.30it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/train/BKL


100%|██████████| 2099/2099 [02:48<00:00, 12.45it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/train/BCC


100%|██████████| 2658/2658 [03:59<00:00, 11.10it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/train/VASC


100%|██████████| 202/202 [00:13<00:00, 15.27it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/train/MEL


100%|██████████| 3618/3618 [05:06<00:00, 11.81it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/train/AK


100%|██████████| 694/694 [00:54<00:00, 12.72it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/train/DF


100%|██████████| 191/191 [00:12<00:00, 15.56it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/val


0it [00:00, ?it/s]

Writing to file





Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/val/NV


100%|██████████| 1287/1287 [01:38<00:00, 13.03it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/val/MEL


100%|██████████| 452/452 [00:31<00:00, 14.23it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/val/BCC


100%|██████████| 332/332 [00:23<00:00, 13.90it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/val/BKL


100%|██████████| 262/262 [00:18<00:00, 14.23it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/val/AK


100%|██████████| 87/87 [00:23<00:00,  3.75it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/val/VASC


100%|██████████| 26/26 [00:06<00:00,  3.83it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/val/DF


100%|██████████| 24/24 [00:06<00:00,  3.92it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/val/SCC


100%|██████████| 63/63 [00:16<00:00,  3.81it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/test


0it [00:00, ?it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/test/AK


100%|██████████| 86/86 [00:22<00:00,  3.85it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/test/BKL


100%|██████████| 263/263 [00:17<00:00, 15.14it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/test/MEL


100%|██████████| 452/452 [00:32<00:00, 13.95it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/test/BCC


100%|██████████| 333/333 [00:24<00:00, 13.57it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/test/VASC


100%|██████████| 25/25 [00:06<00:00,  4.10it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/test/NV


100%|██████████| 1288/1288 [01:29<00:00, 14.34it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/test/SCC


100%|██████████| 63/63 [00:16<00:00,  3.71it/s]


Writing to file
Processing /content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_v2_prepro/test/DF


100%|██████████| 24/24 [00:07<00:00,  3.38it/s]


Writing to file
Complete!


In [3]:
 full_csv = pd.read_csv("/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata_with_full_paths.csv")

In [4]:
full_csv

Unnamed: 0,image,age_approx,anatom_site_general,lesion_id,sex,image_path,rand_split,fitzpatrick
0,ISIC_0000000,55.0,anterior torso,,female,/content/drive/MyDrive/Dissertation/skin_lesio...,test,6.0
1,ISIC_0000001,30.0,anterior torso,,female,/content/drive/MyDrive/Dissertation/skin_lesio...,test,6.0
2,ISIC_0000002,60.0,upper extremity,,female,/content/drive/MyDrive/Dissertation/skin_lesio...,test,6.0
3,ISIC_0000003,30.0,upper extremity,,male,/content/drive/MyDrive/Dissertation/skin_lesio...,test,1.0
4,ISIC_0000004,80.0,posterior torso,,male,/content/drive/MyDrive/Dissertation/skin_lesio...,test,1.0
...,...,...,...,...,...,...,...,...
25326,ISIC_0073247,85.0,head/neck,BCN_0003925,female,/content/drive/MyDrive/Dissertation/skin_lesio...,test,1.0
25327,ISIC_0073248,65.0,anterior torso,BCN_0001819,male,/content/drive/MyDrive/Dissertation/skin_lesio...,test,6.0
25328,ISIC_0073249,70.0,lower extremity,BCN_0001085,male,/content/drive/MyDrive/Dissertation/skin_lesio...,test,1.0
25329,ISIC_0073251,55.0,palms/soles,BCN_0002083,female,/content/drive/MyDrive/Dissertation/skin_lesio...,test,3.0


In [5]:
# combine json files

with open("/content/drive/MyDrive/Dissertation/skin_lesion_data/v2_prepro_brisque_metrics.json", "r") as f:
    train_brisque_metrics_reopened = json.load(f)

In [6]:
with open("/content/drive/MyDrive/Dissertation/skin_lesion_data/test_brisque_metrics.json", "r") as f:
    test_brisque_metrics_reopened = json.load(f)

In [7]:
with open("/content/drive/MyDrive/Dissertation/skin_lesion_data/val_brisque_metrics.json", "r") as f:
    val_brisque_metrics_reopened = json.load(f)

In [8]:
#out = dict(list(a.items()) + list(b.items()))

total_dataset_brisque = dict(list(train_brisque_metrics_reopened.items()) + list(test_brisque_metrics_reopened.items()) + list(val_brisque_metrics_reopened.items()))



In [12]:
our_len = len(total_dataset_brisque)
actual_total_len = len(train_brisque_metrics_reopened) + len(test_brisque_metrics_reopened) + len(val_brisque_metrics_reopened)

print(our_len)
print(actual_total_len)

25331
25331


In [13]:
with open("/content/drive/MyDrive/Dissertation/skin_lesion_data/total_dataset_brisque_metrics.json", "w") as f:
    json.dump(total_dataset_brisque, f, indent=4)

In [31]:
# next, we want to add the brisque score to the metadata csv file

# create empty column
# get the image paths 
#then query dictonary using score = dictname["key"] which will return the vakue

def add_brisque_to_metadata():
    df = pd.read_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata_with_full_paths.csv')
    with open("/content/drive/MyDrive/Dissertation/skin_lesion_data/total_dataset_brisque_metrics.json", "r") as f:
        total_brisque_metrics_reopened = json.load(f)
    
    df["brisque"] = [None]*len(df)

    for i, row in df.iterrows():
        score = total_brisque_metrics_reopened[row["image_path"]]
        df.loc[i, "brisque"] = score
    
    df.to_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata_with_full_paths_with_brisque.csv', index = False)

In [36]:
# add_brisque_to_metadata()
whole_data_brisque = pd.read_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata_with_full_paths_with_brisque.csv')

In [44]:
# add class to metadata csv

def add_class_metadata():
    df = pd.read_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata_with_full_paths_with_brisque.csv')

    df["class"] = [None]*len(df)

    for i, row in df.iterrows():
        class_type = (df.loc[i, "image_path"].split("/"))[-2]
        df.loc[i, "class"] = class_type
    
    df.to_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata_with_full_paths_with_brisque_and_class.csv', index = False)

In [45]:
add_class_metadata()

In [46]:
pd.read_csv('/content/drive/MyDrive/Dissertation/skin_lesion_data/ISIC_2019_Training_Metadata_with_full_paths_with_brisque_and_class.csv')

Unnamed: 0,image,age_approx,anatom_site_general,lesion_id,sex,image_path,rand_split,fitzpatrick,brisque,class
0,ISIC_0000000,55.0,anterior torso,,female,/content/drive/MyDrive/Dissertation/skin_lesio...,train,6.0,34.572449,NV
1,ISIC_0000001,30.0,anterior torso,,female,/content/drive/MyDrive/Dissertation/skin_lesio...,train,6.0,44.077820,NV
2,ISIC_0000002,60.0,upper extremity,,female,/content/drive/MyDrive/Dissertation/skin_lesio...,val,6.0,33.744324,MEL
3,ISIC_0000003,30.0,upper extremity,,male,/content/drive/MyDrive/Dissertation/skin_lesio...,test,1.0,36.924500,NV
4,ISIC_0000004,80.0,posterior torso,,male,/content/drive/MyDrive/Dissertation/skin_lesio...,test,1.0,61.345398,MEL
...,...,...,...,...,...,...,...,...,...,...
25326,ISIC_0073247,85.0,head/neck,BCN_0003925,female,/content/drive/MyDrive/Dissertation/skin_lesio...,train,1.0,43.043640,BCC
25327,ISIC_0073248,65.0,anterior torso,BCN_0001819,male,/content/drive/MyDrive/Dissertation/skin_lesio...,train,6.0,37.097351,BKL
25328,ISIC_0073249,70.0,lower extremity,BCN_0001085,male,/content/drive/MyDrive/Dissertation/skin_lesio...,test,1.0,32.765320,MEL
25329,ISIC_0073251,55.0,palms/soles,BCN_0002083,female,/content/drive/MyDrive/Dissertation/skin_lesio...,val,3.0,48.839539,NV
