## Imports

In [1]:
import os
import pandas as pd
import cv2
import numpy as np
from pydicom import dcmread
from skimage.exposure import rescale_intensity

In [2]:
# Weights the values of the image edges and return the one that has the biggest value
# Required to flip images when necessary
def _get_image_laterality(image):
    left_edge = np.sum(image[:, 0])  
    right_edge = np.sum(image[:, -1])
    return "R" if left_edge < right_edge else "L"

# get image window center
def _get_window_center(ds):
    return np.float32(ds[0x5200, 0x9229][0][0x0028, 0x9132][0][0x0028, 0x1050].value)

# Get imahe window width
def _get_window_width(ds):
    return np.float32(ds[0x5200, 0x9229][0][0x0028, 0x9132][0][0x0028, 0x1051].value)

# Save image to rgb grayscale for 3 channels
def saveToRGBGrayScale(image, output_name):
    backtorgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    cv2.imwrite(output_name, backtorgb)

## Read Metadata

In [3]:
# Read metadata files
train_boxes = pd.read_csv('boxes.csv', delimiter=';')
files_path_train = pd.read_csv('paths.csv', delimiter=';')
labels = pd.read_csv('labels.csv', delimiter=';')
meta = pd.read_csv('metadata.csv')

In [4]:
# Folder for cancer images
MALIGN_FOLDER='malign_images'

# Folder for benign images
BENIGN_FOLDER='benign_images'

# Folder for actionable images
ACTIONABLE_FOLDER='actionable_images'

# Folder for normal images
NORMAL_FOLDER='normal_images'

# Create folder for maskless images
MASKLESS_IMAGES = 'maskless_images'
os.mkdir(MASKLESS_IMAGES)
os.mkdir(os.path.join(MASKLESS_IMAGES, MALIGN_FOLDER))
os.mkdir(os.path.join(MASKLESS_IMAGES, BENIGN_FOLDER))
os.mkdir(os.path.join(MASKLESS_IMAGES, ACTIONABLE_FOLDER))
os.mkdir(os.path.join(MASKLESS_IMAGES, NORMAL_FOLDER))


# Create folder for segmentations
MASK_IMAGES = 'mask_images'
os.mkdir(MASK_IMAGES)
os.mkdir(os.path.join(MASK_IMAGES, MALIGN_FOLDER))
os.mkdir(os.path.join(MASK_IMAGES, BENIGN_FOLDER))
os.mkdir(os.path.join(MASK_IMAGES, ACTIONABLE_FOLDER))
os.mkdir(os.path.join(MASK_IMAGES, NORMAL_FOLDER))

In [None]:
aux = pd.merge(left=files_path_train, right=train_boxes, how='outer', on=['PatientID', 'StudyUID', 'View'])
aux.head()

In [None]:
all_data = pd.merge(left=aux, right=labels, how='outer', on=['PatientID', 'StudyUID', 'View'])
all_data.head()

In [None]:
# Create column for series UID
all_data['Series UID'] = all_data['classic_path'].apply(lambda x: x.split('/')[-2])
all_data.head()

In [None]:
all_data = all_data.rename(columns={'PatientID': 'Subject ID'})
all_data = pd.merge(left=all_data, right=meta, how='outer', on=['Subject ID', 'Series UID'])
all_data.head()

## Construct Masks

In [None]:
# Remove all rows for which we don't the dicom file location
all_data = all_data.dropna(subset=['File Location'])
all_data.head()

In [10]:
# Iterate over the train boxes to generate the mask
for index, elem in all_data.iterrows():

    # Read and prepare image
    ds = dcmread(elem['File Location'] + '/1-1.dcm')
    arr = ds.pixel_array

    window_center = _get_window_center(ds)
    window_width = _get_window_width(ds)
    low = (2 * window_center - window_width) / 2
    high = (2 * window_center + window_width) / 2
    arr = rescale_intensity(
        arr, in_range=(low, high), out_range="dtype"
    )

    # Grabs path where to insert image and possible mask
    if np.isnan(elem['X']):
        path = MASKLESS_IMAGES
    else:
        path = MASK_IMAGES

    if elem['Normal'] == 1:
        path = os.path.join(path, NORMAL_FOLDER)
    elif elem['Actionable'] == 1:
        path = os.path.join(path, ACTIONABLE_FOLDER)
    elif elem['Benign'] == 1:
        path = os.path.join(path, BENIGN_FOLDER)
    else:
        path = os.path.join(path, MALIGN_FOLDER)

    if not np.isnan(elem['X']):

        # Read slice pretended
        #c_image = arr[int(elem['Slice'])-1]*255
        c_image = arr[int(elem['Slice'])-1]

        view_laterality = elem['View'][0].upper()
        image_laterality = _get_image_laterality(c_image)

        if not image_laterality == view_laterality:
            c_image = np.flip(c_image, axis=(-1, -2))

        image_save_path = os.path.join(path, elem['Subject ID'] + '_' + elem['View'] + '_' + str(elem['Series UID']) + '.png')
        saveToRGBGrayScale(image=c_image, output_name=image_save_path)
        #cv2.imwrite(image_save_path, c_image)

        # Create mask
        mask = np.zeros((c_image.shape[0], c_image.shape[1]))
        segmented_mask = cv2.rectangle(mask, (int(elem['X']), int(elem['Y'])), (int(elem['X']) + int(elem['Width']), int(elem['Y']) + int(elem['Height'])), (255), -1)

        # Save mask
        mask_save_path = os.path.join(path, elem['Subject ID'] + '_' + elem['View'] + '_' + str(elem['Series UID']) + '_mask.png')
        cv2.imwrite(mask_save_path, segmented_mask)
    
    else:
        for i in range(arr.shape[0]):
            #c_image = arr[i]*255
            c_image = arr[i]
            image_save_path = os.path.join(path, elem['Subject ID'] + '_' + elem['View'] + '_' + str(elem['Series UID']) + '_' + str(i) + '.png')
            #cv2.imwrite(image_save_path, c_image)
            saveToRGBGrayScale(image=c_image, output_name=image_save_path)
    
