# DATA PREPARATION

This notebook is dedicated to preparing the dataset for two key tasks:

1. Training a YOLOv8 Model for Pothole Detection
2. Fine-tuning the Segment Anything Model (SAM) for Pothole Segmentation

For YOLOv8, the object detection model requires annotations in a specific format: bounding boxes normalized to the image size, saved in text files following the YOLO format (class x_center y_center width height). This notebook converts raw annotation data into this structure to enable seamless training with YOLOv8.

For fine-tuning SAM, the model requires a different input format. Specifically:
1. Bounding boxes in (x_min, y_min, x_max, y_max) format are used as prompts,
2. Binary masks (segmenting the pothole regions) serve as ground truth labels.

This notebook processes the raw dataset to generate and save both bounding box prompts and corresponding binary masks in the appropriate structure. These preprocessed inputs are then used to train SAM’s mask decoder while keeping the encoders frozen.

In summary, this notebook handles all the format conversions and preprocessing necessary to make the dataset compatible with both the YOLOv8 and fine-tuned SAM pipelines.

In [3]:
import numpy as np
import pickle
import cv2
import os

In [1]:
import os

def convert_polygons_to_bboxes(label_dir, bbox_dir, class_id=0):
    """
    Converts polygon labels in YOLO segmentation format to bounding boxes in YOLO detection format.
    
    Args:
        label_dir (str): path to the folder containing original label .txt files (polygon format)
        bbox_dir (str): path to the folder to save converted bbox .txt files (YOLO format)
        class_id (int): class ID to use for all bounding boxes (default is 0 for pothole)
    """
    os.makedirs(bbox_dir, exist_ok=True)

    for filename in os.listdir(label_dir):
        if not filename.endswith('.txt'):
            continue

        label_path = os.path.join(label_dir, filename)
        bbox_path = os.path.join(bbox_dir, filename)

        with open(label_path, 'r') as f:
            lines = f.readlines()

        bbox_lines = []
        for line in lines:
            parts = list(map(float, line.strip().split()))
            if len(parts) < 6:
                continue  # skip if not enough coords to make a bbox
            coords = parts[1:]  # skip class ID

            x_coords = coords[0::2]
            y_coords = coords[1::2]

            x_min, x_max = min(x_coords), max(x_coords)
            y_min, y_max = min(y_coords), max(y_coords)

            x_center = (x_min + x_max) / 2
            y_center = (y_min + y_max) / 2
            width = x_max - x_min
            height = y_max - y_min

            bbox_lines.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

        # Save converted bboxes
        with open(bbox_path, 'w') as f:
            f.write('\n'.join(bbox_lines))

    print(f"✅ Converted all polygon labels from '{label_dir}' to YOLO bboxes in '{bbox_dir}'")

In [2]:
# Convert for training set
convert_polygons_to_bboxes(
    label_dir= 'C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/train/labels',
    bbox_dir= 'C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/train/bbox',
    class_id=0  # pothole
)

# Convert for validation set
convert_polygons_to_bboxes(
    label_dir='C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/valid/labels',
    bbox_dir='C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/valid/bbox',
    class_id=0
)


✅ Converted all polygon labels from 'C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/train/labels' to YOLO bboxes in 'C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/train/bbox'
✅ Converted all polygon labels from 'C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/valid/labels' to YOLO bboxes in 'C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/valid/bbox'


In [37]:
def read_image(img_path):
    """Loads an image from a path."""
    image = cv2.imread(img_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    return image

In [38]:
def extract_masks_and_bboxes(label_path, img_shape):
    """
    Args:
        label_path (str): path to .txt label file (with polygon coordinates)
        img_shape (tuple): (H, W) of the image (for scaling normalized coords)

    Returns:
        masks (List[np.ndarray]): list of binary masks, each (H, W)
        boxes (List[np.ndarray]): list of bounding boxes [x_min, y_min, x_max, y_max]
    """
    h, w = img_shape
    base_mask = np.zeros((h, w), dtype=np.uint8)

    with open(label_path, 'r') as f:
        for line in f:
            items = [float(x) for x in line.strip().split()]
            coords = items[1:]  # skip class id
            if len(coords) < 6:
                continue

            polygon = []
            for i in range(0, len(coords), 2):
                x = int(round(coords[i] * w))
                y = int(round(coords[i+1] * h))
                polygon.append((x, y))

            cv2.fillPoly(base_mask, [np.array(polygon, dtype=np.int32)], 1)

    # Now separate components in the filled mask
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(
        base_mask, connectivity=8
    )

    masks = []
    boxes = []

    for i in range(1, num_labels):  # skip background
        component_mask = (labels == i).astype(np.uint8)
        masks.append(component_mask)

        x, y, w_box, h_box, _ = stats[i]
        boxes.append(np.array([x, y, x + w_box, y + h_box]))

    return masks, boxes


In [39]:
image_dir = 'C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/train/images'
label_dir = 'C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/train/labels'
ground_truth_masks = {}
bbox_coords = {}
for file in os.listdir(image_dir):
    img_path = os.path.join(image_dir, file)
    k = os.path.splitext(file)[0]
    label_path = os.path.join(label_dir, k + ".txt")
    img = read_image(img_path)
    masks, boxes = extract_masks_and_bboxes(label_path, img.shape[:2])
    masks = [(mask > 0).astype(np.uint8) for mask in masks]
    ground_truth_masks[k] = masks
    bbox_coords[k] = boxes

In [40]:
save_dir = "C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/train/"
with open(os.path.join(save_dir, "ground_truth_masks.pkl"), "wb") as f:
    pickle.dump(ground_truth_masks, f)

In [41]:
with open(os.path.join(save_dir, "bbox_coords.pkl"), "wb") as f:
    pickle.dump(bbox_coords, f)

In [42]:
file_path = "C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/train/ground_truth_masks.pkl"

# Load the dictionary
with open(file_path, "rb") as f:
    ground_truth_masks = pickle.load(f)

In [43]:
len(ground_truth_masks)

720

In [44]:
file_path = "C:/Users/srish/OneDrive/Desktop/Pothole_Segmentation_YOLOv8/train/bbox_coords.pkl"

# Load the dictionary
with open(file_path, "rb") as f:
    bbox_coords = pickle.load(f)

In [45]:
len(bbox_coords)

720

In [46]:
keys = list(bbox_coords.keys())
k = keys[21]
boxes = bbox_coords[k]

In [47]:
for i, box in enumerate(boxes):
    print(f"Box {i} shape: {box.shape}, content: {box}")

Box 0 shape: (4,), content: [158 348 442 445]


In [48]:
for i, mask in enumerate(ground_truth_masks[k]):
    print(f"Box {i} shape: {mask.shape}, content: {mask}")

Box 0 shape: (640, 640), content: [[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
