<h3>This notebook contains code used to create bounding boxes around cracks. Numpy files will be created to hold that information.</h3>

In [2]:
import os
import pickle
import cv2
import numpy as np

In [3]:
get_paths = lambda path: [f"{os.path.join(root, file)}" for root, dirs, files in os.walk(path) for file in files]
train_dir = os.path.join("data", "train")
valid_dir = os.path.join("data", "valid")
masks_dir_train = os.path.join(train_dir, "masks")
masks_dir_valid = os.path.join(valid_dir, "masks")
mask_paths_train = get_paths(masks_dir_train)
mask_paths_valid = get_paths(masks_dir_valid)

In [4]:
def find_boxes(paths: list[str], stats_file: str, labels_file: str, batch_size: int = 100) -> None:
    os.makedirs(os.path.dirname(stats_file), exist_ok=True)
    os.makedirs(os.path.dirname(labels_file), exist_ok=True)

    with open(labels_file, 'wb') as labels_f, open(stats_file, 'wb') as stats_f:
        for i in range(0, len(paths), batch_size):
            batch_paths = paths[i:i+batch_size]
            batch_labels = []
            batch_stats = []

            for mask_path in batch_paths:
                image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
                _, binary_image = cv2.threshold(image, 127, 255, cv2.THRESH_BINARY)
                num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary_image, connectivity=8)
                large_labels = []
                large_stats = []
    
                for j in range(1, num_labels):
                    if stats[j, cv2.CC_STAT_AREA] > 20:
                        large_labels.append(labels == j)
                        large_stats.append(stats[j])
                if large_labels:
                    large_labels_array = np.zeros_like(labels)
                    
                    for idx, lbl in enumerate(large_labels):
                        large_labels_array[lbl] = idx + 1
                        
                    batch_labels.append(large_labels_array)
                    batch_stats.append(np.array(large_stats))

            pickle.dump(batch_labels, labels_f)
            pickle.dump(batch_stats, stats_f)

            del batch_labels, batch_stats

In [5]:
train_stats_file = os.path.join(train_dir, "stats.pkl")
train_labels_file = os.path.join(train_dir, "labels.pkl")

find_boxes(mask_paths_train, train_stats_file, train_labels_file)

In [6]:
valid_stats_file = os.path.join(valid_dir, "stats.pkl")
valid_labels_file = os.path.join(valid_dir, "labels.pkl")

find_boxes(mask_paths_valid, valid_stats_file, valid_labels_file)