<h3>This notebook contains code used to create bounding boxes around lesions. Coords will be added to the metadata file.</h3>

In [8]:
import os

import pandas as pd
import numpy as np
import PIL.Image

from functions.drawing import draw_rectangle

In [9]:
get_paths = lambda path: [f"{os.path.join(root, file)}" for root, dirs, files in os.walk(path) for file in files]
masks_dir = os.path.join("data", "masks_200x150")
boxes_dir = os.path.join("data", "boxes")
mask_paths = get_paths(masks_dir)
metadata_path = os.path.join("data", "HAM10000_metadata.csv")
data = pd.read_csv(metadata_path).sort_values(by="image_id")

In [4]:
def get_coord(array: np.ndarray) -> int:
    coord = 0

    for row in array:
        if np.any(row > 0):
            break

        coord += 1

    return coord


def get_name(mask_path: str) -> str:
    return "_".join(
        mask_path
        .split(os.sep)[-1]
        .split(".")[-2]
        .split("_")[:-1])

Some images had different formats from the other ones. Better to drop the alpha channel entirely and operate on grayscale images.

In [5]:
tops = []
bottoms = []
lefts = []
rights = []

for mask_path in mask_paths:
    with PIL.Image.open(mask_path).convert("RGB").convert("L") as mask:
        mask_pixels = np.asarray(mask)
        shape = mask_pixels.shape
        name = get_name(mask_path)

        tops.append(get_coord(mask_pixels))
        bottoms.append(shape[0] - get_coord(mask_pixels[::-1]))
        lefts.append(get_coord(mask_pixels.T))
        rights.append(shape[1] - get_coord(mask_pixels.T[::-1]))

        new_path = os.path.join(boxes_dir, f"{name}.png")

        draw_rectangle(mask, (lefts[-1], tops[-1], rights[-1], bottoms[-1]), new_path)

In [6]:
data["top"] = tops
data["bottom"] = bottoms
data["left"] = lefts
data["right"] = rights

In [7]:
extended_metadata_path = os.path.join('data', 'HAM10000_metadata_ext.csv')

data.to_csv(extended_metadata_path)