In [14]:
import cv2
import numpy as np
import os
from sklearn.cluster import DBSCAN

In [15]:
def load_and_preprocess_image(image_path):
    """
    Load the image and preprocess it: grayscale, Gaussian blur, and thresholding.
    """
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    if image is None:
        raise FileNotFoundError(f"Error: Could not load image {image_path}")

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, binary = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY_INV)
    return image, binary

In [16]:
def extract_and_describe_contours(binary_image):
    """
    Extract contours and compute centroids and angles for each contour.
    """
    contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_data = []
    for contour in contours:
        M = cv2.moments(contour)
        if M["m00"] == 0:
            continue
        cX = int(M["m10"] / M["m00"])  # Centroid X
        cY = int(M["m01"] / M["m00"])  # Centroid Y
        rect = cv2.minAreaRect(contour)
        angle = rect[-1]  # Angle of the rectangle
        contour_data.append({"contour": contour, "centroid": (cX, cY), "angle": angle})
    return contour_data

In [17]:
def cluster_contours(contour_data, eps=50, min_samples=2):
    """
    Cluster contours based on centroids and angles using DBSCAN.
    """
    features = []
    for data in contour_data:
        cX, cY = data["centroid"]
        angle = data["angle"]
        features.append([cX, cY, angle])
    features = np.array(features)
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    labels = dbscan.fit_predict(features)
    return labels

In [18]:
def visualize_clusters(image, contour_data, labels):
    """
    Visualize clusters by coloring each cluster differently.
    """
    result = image.copy()
    unique_labels = set(labels)
    colors = [tuple(np.random.randint(0, 255, 3).tolist()) for _ in unique_labels]
    for i, data in enumerate(contour_data):
        contour = data["contour"]
        label = labels[i]
        if label != -1:
            color = colors[label]
            cv2.drawContours(result, [contour], -1, color, 2)
    return result

In [19]:
def process_image(image_path, output_path, eps=50, min_samples=2):
    """
    Full pipeline to process a single image: load, preprocess, cluster, and visualize.
    """
    original_image, binary = load_and_preprocess_image(image_path)
    contour_data = extract_and_describe_contours(binary)
    labels = cluster_contours(contour_data, eps, min_samples)
    clustered_image = visualize_clusters(original_image, contour_data, labels)
    cv2.imwrite(output_path, clustered_image)
    print(f"Processed image saved to {output_path}")

In [20]:
def batch_process_images(input_folder, output_folder, eps=50, min_samples=2):
    """
    Process all images in the input folder and save them in the output folder.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    for filename in os.listdir(input_folder):
        if filename.lower().endswith(('png', 'jpg', 'jpeg')):
            input_path = os.path.join(input_folder, filename)
            output_path = os.path.join(output_folder, f"clustered_{filename}")
            process_image(input_path, output_path, eps, min_samples)

# Example usage
input_dir = r"C:/Users/Administrator/Documents/GitHub/Untitled Folder/data/input/raw"
output_dir = r"C:/Users/Administrator/Documents/GitHub/Untitled Folder/data/output_gt/raw"

batch_process_images(input_dir, output_dir, eps=50, min_samples=3)


Processed image saved to C:/Users/Administrator/Documents/GitHub/Untitled Folder/data/output_gt/raw\clustered_level_1.PNG
Processed image saved to C:/Users/Administrator/Documents/GitHub/Untitled Folder/data/output_gt/raw\clustered_level_2.PNG
Processed image saved to C:/Users/Administrator/Documents/GitHub/Untitled Folder/data/output_gt/raw\clustered_level_3.PNG
Processed image saved to C:/Users/Administrator/Documents/GitHub/Untitled Folder/data/output_gt/raw\clustered_level_4.PNG


In [8]:
pip install opencv-python-headless

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl (39.4 MB)
   ---------------------------------------- 0.0/39.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/39.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/39.4 MB 495.5 kB/s eta 0:01:20
   ---------------------------------------- 0.2/39.4 MB 1.4 MB/s eta 0:00:29
   ---------------------------------------- 0.3/39.4 MB 2.0 MB/s eta 0:00:20
    --------------------------------------- 0.5/39.4 MB 2.4 MB/s eta 0:00:17
    --------------------------------------- 0.7/39.4 MB 3.0 MB/s eta 0:00:13
   - -------------------------------------- 1.4/39.4 MB 4.5 MB/s eta 0:00:09
   -- ------------------------------------- 2.7/39.4 MB 7.6 MB/s eta 0:00:05
   ---- ----------------------------------- 4.3/39.4 MB 10.5 MB/s eta 0:00:04
   ----- -------------------------------