<h3>This notebook contains code used to create bounding boxes around lesions. Coords will be added to a new metadata file.</h3>

In [1]:
import os

import pandas as pd
import numpy as np
import PIL.Image

from typing import Tuple

In [2]:
get_paths = lambda path: [
    f'{os.path.join(root, file)}'
    for root, dirs, files in os.walk(path)
    for file in files
]
get_names = lambda path: [
    file_name.split('.')[0]
    for dir_path, _, file_names in os.walk(path)
    for file_name in file_names
]
masks_dir = os.path.join('..', 'data', 'masks_200x150')
mask_paths = get_paths(masks_dir)
augmentation_training_dir = os.path.join(
    '..',
    'data',
    'images_original_inception_resnet_v2_200x150_splitted_with_augmentation',
    'training')
augmentation_validation_dir = os.path.join(
    '..',
    'data',
    'images_original_inception_resnet_v2_200x150_splitted_with_augmentation',
    'validation')
augmentation_training_paths = get_paths(augmentation_training_dir)
augmentation_validation_paths = get_paths(augmentation_validation_dir)

In [3]:
def get_coord(array: np.ndarray) -> int:
    coord = 0

    for row in array:
        if np.any(row > 0):
            break

        coord += 1

    return coord


def get_name(path: str) -> str:
    return path.split(os.sep)[-1].split('.')[-2]


def get_mask_name(mask_path: str) -> str:
    return '_'.join(get_name(mask_path).split('_')[:-1])

In [9]:
def append_copy_coords(orig_name: str, paths: list[str], boxes: list, box: Tuple[int, int, int, int]) -> None:
    for counter in range(len(paths)):
        path = paths[counter]
        copy_name = get_name(path)

        if orig_name == copy_name:
            continue

        root_name = '_'.join(copy_name.split('_')[:-1])

        if root_name == orig_name:
            boxes.append((copy_name, box[0], box[1], box[2], box[3]))

In [10]:
training_boxes = []
validation_boxes = []
validation_names_hash = set(map(lambda path: get_mask_name(path), augmentation_validation_paths))

for mask_path in mask_paths:
    with PIL.Image.open(mask_path).convert('RGB').convert('L') as mask:
        mask_pixels = np.asarray(mask)
        shape = mask_pixels.shape
        name = get_mask_name(mask_path)
        top = get_coord(mask_pixels)
        bottom = shape[0] - get_coord(mask_pixels[::-1])
        left = get_coord(mask_pixels.T)
        right = shape[1] - get_coord(mask_pixels.T[::-1])
        box = (top, bottom, left, right)

        if name not in validation_names_hash:
            training_boxes.append((name, top, bottom, left, right))
            append_copy_coords(name, augmentation_training_paths, training_boxes, box)
        else:
            validation_boxes.append((name, top, bottom, left, right))
            append_copy_coords(name, augmentation_validation_paths, validation_boxes, box)

In [12]:
train_df = pd.DataFrame(
    training_boxes,
    columns=['image_id', 'top', 'bottom', 'left', 'right'])
valid_df = pd.DataFrame(
    validation_boxes,
    columns=['image_id', 'top', 'bottom', 'left', 'right'])


train_df.to_csv(os.path.join('..', 'data', 'training_boxes.csv'), index=False)
valid_df.to_csv(os.path.join('..', 'data', 'validation_boxes.csv'), index=False)