<h3>This notebook contains code used to create bounding boxes around cracks. Numpy files will be created to hold that information.</h3>

In [6]:
import os

import cv2
import numpy as np

In [7]:
get_paths = lambda path: [f"{os.path.join(root, file)}" for root, dirs, files in os.walk(path) for file in files]
train_dir = os.path.join("data", "train")
valid_dir = os.path.join("data", "valid")
masks_dir_train = os.path.join(train_dir, "masks")
masks_dir_valid = os.path.join(valid_dir, "masks")
mask_paths_train = get_paths(masks_dir_train)
mask_paths_valid = get_paths(masks_dir_valid)

In [8]:
def find_boxes(paths: list[str], stats_file: str, labels_file: str, batch_size: int = 100) -> None:
    all_labels = []
    all_stats = []

    for i in range(0, len(paths), batch_size):
        batch_paths = paths[i:i+batch_size]
        batch_labels = []
        batch_stats = []

        for mask_path in batch_paths:
            image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
            num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=8)

            batch_labels.append(labels)
            batch_stats.append(stats)

        batch_labels_array = np.array(batch_labels, dtype=object)
        batch_stats_array = np.array(batch_stats, dtype=object)

        all_labels.append(batch_labels_array)
        all_stats.append(batch_stats_array)

        if i == 0:
            np.save(labels_file, batch_labels_array)
            np.save(stats_file, batch_stats_array)
        else:
            with open(train_labels_file, 'ab') as f:
                np.save(f, batch_labels_array)
            with open(train_stats_file, 'ab') as f:
                np.save(f, batch_stats_array)

        del batch_labels, batch_stats, batch_labels_array, batch_stats_array

In [9]:
train_stats_file = os.path.join(train_dir, "stats.npy")
train_labels_file = os.path.join(train_dir, "labels.npy")

find_boxes(mask_paths_train, train_stats_file, train_labels_file)

In [10]:
valid_stats_file = os.path.join(valid_dir, "stats.npy")
valid_labels_file = os.path.join(valid_dir, "labels.npy")

find_boxes(mask_paths_valid, valid_stats_file, valid_labels_file)