# Generate Standardised DCA Dataset

This notebook runs through the entire train_balanced dataset and applies a dca of a single size across all images in the dataset. Any image with an exceedingly large DCA will be reduced before applying the erosion techniques to it.

The dca masks used for augmentation are those extracted from the last project. 


## TO DO
Make sure all .csv files are read and are contributing to the check


In [1]:
# append custom system path for custom modules folder in directory if not already
import sys
if  '../../Modules' not in sys.path:
    sys.path.insert(0, '../../Modules')
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from PIL import Image
import isic_data as isic
import os, os.path
import shutil
import random
import realistic_dca as aug
from dca_removal import reduce_intensity

random.seed(72)

In [2]:
# load the dataset
X = isic.get_data(type = 'none')

lesions_train = X[0]
lesions_test = X[1]

In [3]:
# Load in the training melanoma masks
t_mel_csv = pd.read_csv(r"../../Data/Annotations/train_mel.csv")
t_oth_csv = pd.read_csv(r"../../Data/Annotations/train_oth.csv")
# Load in the training melanoma intensity annotations
dca_t_mel_csv = pd.read_csv(r"../../Data/Annotations/dca_intensities_train_mel.csv")
dca_t_oth_csv = pd.read_csv(r"../../Data/Annotations/dca_intensities_train_oth.csv")

# Segregate each mask type and retain the old index
#small_dca_masks = dca_t_mel_csv.loc[dca_t_mel_csv['Small_DCA'] == 1].reset_index(drop = False)
medium_dca_masks = dca_t_mel_csv.loc[dca_t_mel_csv['Medium_DCA'] == 1].reset_index(drop = False)
large_dca_masks = dca_t_mel_csv.loc[dca_t_mel_csv['Large_DCA'] == 1].reset_index(drop = False)
#oth_dca_masks = dca_t_mel_csv.loc[dca_t_mel_csv['Oth'] == 1].reset_index(drop = False)
#tosmall_dca_masks = dca_t_oth_csv.loc[dca_t_oth_csv['Small_DCA'] == 1].reset_index(drop = False)
tomedium_dca_masks = dca_t_oth_csv.loc[dca_t_oth_csv['Medium_DCA'] == 1].reset_index(drop = False)
tolarge_dca_masks = dca_t_oth_csv.loc[dca_t_oth_csv['Large_DCA'] == 1].reset_index(drop = False)
#tooth_dca_masks = dca_t_oth_csv.loc[dca_t_oth_csv['Oth'] == 1].reset_index(drop = False)

# Append the original image name to the dataframe
#small_dca_masks['Original_Image_Name'] = [small_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(small_dca_masks.index))]
medium_dca_masks['Original_Image_Name'] = [medium_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(medium_dca_masks.index))]
large_dca_masks['Original_Image_Name'] = [large_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(large_dca_masks.index))]
#oth_dca_masks['Original_Image_Name'] = [oth_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(oth_dca_masks.index))]
#tosmall_dca_masks['Original_Image_Name'] = [tosmall_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(tosmall_dca_masks.index))]
tomedium_dca_masks['Original_Image_Name'] = [tomedium_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(tomedium_dca_masks.index))]
tolarge_dca_masks['Original_Image_Name'] = [tolarge_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(tolarge_dca_masks.index))]
#tooth_dca_masks['Original_Image_Name'] = [tooth_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(tooth_dca_masks.index))]

#small_dca_masks = list(small_dca_masks['Image_Name']) + list(tosmall_dca_masks['Image_Name'])
medium_dca_masks = list(medium_dca_masks['Image_Name']) + list(tomedium_dca_masks['Image_Name'])
large_dca_masks = list(large_dca_masks['Image_Name']) + list(tolarge_dca_masks['Image_Name'])
#oth_dca_masks = list(oth_dca_masks['Image_Name']) +  list(tooth_dca_masks['Image_Name'])


In [14]:
# Load in the training melanoma masks
v_mel_csv = pd.read_csv(r"../../Data/Annotations/val_mel.csv")
v_oth_csv = pd.read_csv(r"../../Data/Annotations/val_oth.csv")
# Load in the training melanoma intensity annotations
dca_v_mel_csv = pd.read_csv(r"../../Data/Annotations/dca_intensities_val_mel.csv")
dca_v_oth_csv = pd.read_csv(r"../../Data/Annotations/dca_intensities_val_oth.csv")

# Segregate each mask type and retain the old index
#vsmall_dca_masks = dca_v_mel_csv.loc[dca_v_mel_csv['Small_DCA'] == 1].reset_index(drop = False)
vmedium_dca_masks = dca_v_mel_csv.loc[dca_v_mel_csv['Medium_DCA'] == 1].reset_index(drop = False)
vlarge_dca_masks = dca_v_mel_csv.loc[dca_v_mel_csv['Large_DCA'] == 1].reset_index(drop = False)
#voth_dca_masks = dca_v_mel_csv.loc[dca_v_mel_csv['Oth'] == 1].reset_index(drop = False)
#vosmall_dca_masks = dca_v_oth_csv.loc[dca_v_oth_csv['Small_DCA'] == 1].reset_index(drop = False)
vomedium_dca_masks = dca_v_oth_csv.loc[dca_v_oth_csv['Medium_DCA'] == 1].reset_index(drop = False)
volarge_dca_masks = dca_v_oth_csv.loc[dca_v_oth_csv['Large_DCA'] == 1].reset_index(drop = False)
#vooth_dca_masks = dca_v_oth_csv.loc[dca_v_oth_csv['Oth'] == 1].reset_index(drop = False)

# Append the original image name to the dataframe
#vsmall_dca_masks['Original_Image_Name'] = [vsmall_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(vsmall_dca_masks.index))]
vmedium_dca_masks['Original_Image_Name'] = [vmedium_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(vmedium_dca_masks.index))]
vlarge_dca_masks['Original_Image_Name'] = [vlarge_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(vlarge_dca_masks.index))]
#voth_dca_masks['Original_Image_Name'] = [voth_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(oth_dca_masks.index))]
#vosmall_dca_masks['Original_Image_Name'] = [vosmall_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(vosmall_dca_masks.index))]
vomedium_dca_masks['Original_Image_Name'] = [vomedium_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(vomedium_dca_masks.index))]
volarge_dca_masks['Original_Image_Name'] = [volarge_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(volarge_dca_masks.index))]
#vooth_dca_masks['Original_Image_Name'] = [vooth_dca_masks['Image_Name'][i][:-9] + '.jpg' for i in range(len(vooth_dca_masks.index))]

#vsmall_dca_masks = list(vsmall_dca_masks['Image_Name']) + list(vosmall_dca_masks['Image_Name'])
vmedium_dca_masks = list(vmedium_dca_masks['Image_Name']) + list(vomedium_dca_masks['Image_Name'])
vlarge_dca_masks = list(vlarge_dca_masks['Image_Name']) + list(volarge_dca_masks['Image_Name'])
#voth_dca_masks = list(voth_dca_masks['Image_Name']) + list(vooth_dca_masks['Image_Name'])

In [5]:
# check savepath exists..
savepath = r"../../Data/train_balanced_augmented_std_224x224"

# if it does then delete it and recreate it
if os.path.exists(savepath):
    shutil.rmtree(savepath)
os.mkdir(savepath)
os.mkdir(savepath + r"/train")
os.mkdir(savepath + r"/val")
os.mkdir(savepath + r"/train/mel")
os.mkdir(savepath + r"/val/mel")
os.mkdir(savepath + r"/train/oth")
os.mkdir(savepath + r"/val/oth")

In [6]:
# for all of the images in the dataset, 
# when the image has a medium or large DCA in it, attempt to reduce its intensity. 
        # when its size hasnt changed, apply the standardised DCA.
        # otherwise, bump image size back to 224x224 and then apply standardised DCA (maybe change to the other augmentation here)
# when the image has no dca, just ust the standardised DCA method

In [7]:
medium_dca_masks[0]

'ISIC2017_0001133_mel_MASK.png'

In [8]:
## TRAINING SET
mask_path_mel = r"../../Data/DCA_Masks/train/mel/"
mask_path_oth = r"../../Data/DCA_Masks/train/oth/"
for i, img in enumerate(lesions_train.images):
    temp_str = lesions_train.filenames[i][:-4] + "_MASK.png"
    #print(temp_str)
    if (temp_str in medium_dca_masks) or (temp_str in large_dca_masks):
        # load in the original mask
        if temp_str[-12:-9] == "mel":
            mask = cv2.imread(mask_path_mel + temp_str)
            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)
        else:
            mask = cv2.imread(mask_path_oth + temp_str)
            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)
        
        # reduce its intensity
        cimg, cmask = reduce_intensity(img, mask)
        
        # bump up size
        cimg = cv2.resize(cimg, dsize = (224,224), interpolation = cv2.INTER_CUBIC)
        cmask = cv2.resize(cmask, dsize = (224,224), interpolation = cv2.INTER_CUBIC)
        
        # pass to DCA blur method - NON-STANDARDISED MUST MATCH DCA ALREADY IN IMAGE
        result = aug.augment_dca(cimg, cmask, blur_type = "erode")
        
        # save the result to appropriate directory
        result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
        dest = lesions_train.filenames[i][-7:-4]
        savepath = r"../../Data/train_balanced_augmented_std_224x224/train/" + dest + r"/" + lesions_train.filenames[i][:-4] + ".png"
        cv2.imwrite(savepath, result)
    else:
        # pass to DCA blur method - STANDARDISED
        result = aug.augment_standardised_dca(img, blur_type = "erode")

        # save the result to appropriate directory
        result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
        dest = lesions_train.filenames[i][-7:-4]
        #print(temp_str)
        savepath = r"../../Data/train_balanced_augmented_std_224x224/train/" + dest + r"/" + lesions_train.filenames[i][:-4] + ".png"
        cv2.imwrite(savepath, result)

In [None]:
## VALIDATION SET
mask_path_mel = r"../../Data/DCA_Masks/val/mel/"
mask_path_oth = r"../../Data/DCA_Masks/val/oth/"

for i, img in enumerate(lesions_test.images):
    temp_str = lesions_test.filenames[i][:-4] + "_MASK.png"
    if (temp_str in vmedium_dca_masks) or (temp_str in vlarge_dca_masks):
        # load in the original mask
        if temp_str[-12:-9] == "mel":
            mask = cv2.imread(mask_path_mel + temp_str)
            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)
        else:
            mask = cv2.imread(mask_path_oth + temp_str)
            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)
        
        # reduce its intensity
        cimg, cmask = reduce_intensity(img, mask)
        
        # bump up size
        cimg = cv2.resize(cimg, dsize = (224,224), interpolation = cv2.INTER_CUBIC)
        cmask = cv2.resize(cmask, dsize = (224,224), interpolation = cv2.INTER_CUBIC)
        
        # pass to DCA blur method - NON-STANDARDISED MUST MATCH DCA ALREADY IN IMAGE
        result = aug.augment_dca(cimg, cmask, blur_type = "erode")
        
        # save the result to appropriate directory
        result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
        dest = lesions_train.filenames[i][-7:-4]
        savepath = r"../../Data/train_balanced_augmented_std_224x224/val/" + dest + r"/" + lesions_train.filenames[i][:-4] + ".png"
        cv2.imwrite(savepath, result)
    else:
        # pass to DCA blur method - STANDARDISED
        result = aug.augment_standardised_dca(img, blur_type = "erode")

        # save the result to appropriate directory
        result = cv2.cvtColor(result, cv2.COLOR_RGB2BGR)
        dest = lesions_train.filenames[i][-7:-4]
        savepath = r"../../Data/train_balanced_augmented_std_224x224/val/" + dest + r"/" + lesions_train.filenames[i][:-4] + ".png"
        cv2.imwrite(savepath, result)

## TEST AREA

In [13]:
test_img_name = "ISIC2020_4658487_oth.jpg"
test_temp_str = test_img_name[:-4] + "_MASK.png"

print(test_temp_str)

test_temp_str in vmedium_dca_masks or test_temp_str in vlarge_dca_masks

ISIC2020_4658487_oth_MASK.png


False