# DCA Masking Classification

This notebook loads in all of the masks that have been generated and calculates the DCA intensity present in the mask. This is then recorded and saved into a .csv.

The calculation used to calculate the DCA intensity is as follows:

$(\sum_{pixel_i} pixel_i = 255) \div 501.76 $

This amounts to percentage of the image covered by white pixels.

## Standard Imports

In [1]:
# append custom system path for custom modules folder in directory if not already
import sys
if  '../../Modules' not in sys.path:
    sys.path.insert(0, '../../Modules')

import pandas as pd
import numpy as np
from PIL import Image
import os, os.path
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.utils import Bunch
import cv2 as cv
import image_modifications as im # custom image modification module
import isic_data as isic
from math import ceil

------------

## Generate .csv files

In [10]:
data_filepaths = [r"../../Data/DCA_Masks/train/mel/", 
                  r"../../Data/DCA_Masks/train/oth/", 
                  r"../../Data/DCA_Masks/val/mel/", 
                  r"../../Data/DCA_Masks/val/oth/"]

csvs = [r"../../Data/Annotations/dca_intensities_train_mel.csv",
       r"../../Data/Annotations/dca_intensities_train_oth.csv",
       r"../../Data/Annotations/dca_intensities_val_mel.csv",
       r"../../Data/Annotations/dca_intensities_val_oth.csv"]

# iterate through each of the filepaths
for i, filepath in enumerate(csvs):
    # load in the csv
    csv = pd.read_csv(filepath)
    
    # if the csv is empty, data needs adding
    if csv.empty:
        # set blank column lists
        cols = [[],[],[],[],[]]
        # iterate over each image in the corresponding directory
        for img in os.listdir(data_filepaths[i]):
            # add the filename to col 0
            cols[0].append(str(img))
            
            # load the image data as ndarray
            img_data = np.asarray(Image.open(os.path.join(data_filepaths[i],img)))
            
            # count the white pixels in the image (this is the dca region)
            white_px = np.sum(img_data == 255)
            # calculate the intensity %
            intensity = white_px // (50176/100)
            
            # modify csv columns accordingly
            if intensity < 1:
                cols[1].append(0) # small
                cols[2].append(0) # med
                cols[3].append(0) # large
                cols[4].append(1) # oth
            elif intensity >= 1 and intensity < 25:
                cols[1].append(1)
                cols[2].append(0)
                cols[3].append(0)
                cols[4].append(0)
            elif intensity >= 25 and intensity < 50:
                cols[1].append(0)
                cols[2].append(1)
                cols[3].append(0)
                cols[4].append(0)
            elif intensity >= 50:
                cols[1].append(0)
                cols[2].append(0)
                cols[3].append(1)
                cols[4].append(0)
        
        # turn cols into a dataframe
        fndf = pd.DataFrame(cols)
        # transpose to match appropriate shape
        fndf = fndf.transpose()
        # append the original col names to new dataframe
        col_names = csv.columns
        fndf.columns = col_names
        
        # append data to original csv
        csv = csv.append(fndf)
        
        # save csv
        csv.to_csv(filepath, index = False)