# Object Volume Measurement using ArUco Marker

This notebook demonstrates a **hybrid computer vision approach** to measure the **real-world dimensions and estimated volume** of an object in a video or live camera feed.  
The method combines **Aruco-based scale calibration** with **image-based contour analysis**, producing a robust, practical system for general object measurement and expansion tracking.

In [164]:
import cv2
import numpy as np
import csv
from datetime import datetime
import time
import os

## Initialize ArUco Detector

We use OpenCV‚Äôs built-in ArUco module to detect a specific marker pattern (e.g., 4x4 dictionary).  
This provides a known physical reference to convert pixel measurements into centimeters.

In [165]:
def setup_aruco_detector():
    """Initialize ArUco detector (OpenCV ‚â•4.7)."""
    aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50)
    aruco_params = cv2.aruco.DetectorParameters()
    detector = cv2.aruco.ArucoDetector(aruco_dict, aruco_params)
    return detector

## Detect ArUco Marker and Compute Scale

This function returns:
- `cm_per_pixel`: conversion factor between pixels and centimeters  
- `corners`: marker corner coordinates for visualization  
- `measured_marker_cm`: pixel-based marker size (for reference)


In [166]:
def detect_aruco_scale(gray, detector, marker_id, marker_size_cm):
    """Detect ArUco marker and compute scale (cm/pixel)."""
    corners, ids, _ = detector.detectMarkers(gray)
    if ids is not None and marker_id in ids:
        idx = np.where(ids == marker_id)[0][0]
        c = corners[idx][0]
        side_px = np.mean(
            [
                np.linalg.norm(c[0] - c[1]),
                np.linalg.norm(c[1] - c[2]),
                np.linalg.norm(c[2] - c[3]),
                np.linalg.norm(c[3] - c[0]),
            ]
        )
        cm_per_pixel = marker_size_cm / side_px
        measured_marker_cm = side_px * cm_per_pixel
        return cm_per_pixel, measured_marker_cm, c
    return None, None, None

## Image Preprocessing

Enhances visibility and contrast for stable object contour extraction.

In [None]:
def preprocess_image(frame):
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    v = cv2.equalizeHist(v)
    hsv = cv2.merge([h, s, v])
    frame_hsv = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    gray = cv2.cvtColor(frame_hsv, cv2.COLOR_BGR2GRAY)
    _, obj_mask = cv2.threshold(gray, 60, 255, cv2.THRESH_BINARY)
    edited = frame.astype(np.float32)
    edited += 100
    edited = edited * 0.5 + 64
    edited = np.clip(edited, 0, 255).astype(np.uint8)
    edited_obj = frame.copy()
    edited_obj[obj_mask == 255] = edited[obj_mask == 255]
    gray2 = cv2.cvtColor(edited_obj, cv2.COLOR_BGR2GRAY)
    return gray2

## Contour Detection

Find the largest contour using Sobel edge detection and morphological filtering.


In [168]:
def detect_largest_contour(gray2):
    blur = cv2.GaussianBlur(gray2, (9, 9), 0)
    sobel_x = cv2.Sobel(blur, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(blur, cv2.CV_64F, 0, 1, ksize=3)
    sobel = cv2.magnitude(sobel_x, sobel_y)
    sobel = cv2.convertScaleAbs(sobel)
    _, thresh = cv2.threshold(sobel, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    kernel = np.ones((7, 7), np.uint8)
    thresh_closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    contours, _ = cv2.findContours(thresh_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    return max(contours, key=cv2.contourArea) if contours else None

## Compute Volume and Error Margin
Uses Bai et al. (2006) volume estimation and calculates weighted physical error.## Compute Volume and Error Margin
Uses Bai et al. (2006) volume estimation and calculates weighted physical error.

In [169]:
def compute_volume_and_error(contour, cm_per_pixel, true_dims=(17.5, 6.5)):
    """Compute width, height, volume, and margin of error."""
    x, y, w, h = cv2.boundingRect(contour)
    mask = np.zeros_like(cv2.cvtColor(np.zeros((h, w), np.uint8), cv2.COLOR_GRAY2BGR))
    bun_mask = np.zeros((h, w), np.uint8)
    cv2.drawContours(bun_mask, [contour], -1, 255, -1)

    diameters_px = []
    for col in range(x, x + w):
        col_pix = np.where(bun_mask[:, col - x] > 0)[0]
        if len(col_pix) > 1:
            diameters_px.append(col_pix.max() - col_pix.min())

    if len(diameters_px) <= 10:
        return None

    diameters_px = np.array(diameters_px)
    diameters_cm = diameters_px * cm_per_pixel
    delta_x_cm = cm_per_pixel
    volume_cm3 = np.sum(np.pi * (diameters_cm / 2) ** 2 * delta_x_cm)

    # === Width/Height in cm ===
    w_cm = w * cm_per_pixel
    h_cm = h * cm_per_pixel

    # === Error estimation (physical comparison only) ===
    true_w, true_h = true_dims
    width_diff_pct = abs((w_cm - true_w) / true_w) * 10
    height_diff_pct = abs((h_cm - true_h) / true_h) * 10
    err_total = np.sqrt(0.7 * (width_diff_pct**2) + 0.3 * (height_diff_pct**2))

    return w_cm, h_cm, volume_cm3, err_total, (x, y, w, h)

## CSV Logger Helpers
Initialize and append to CSV file for measurement tracking.

In [170]:
def init_csv_logger():
    """
    Initialize the CSV logger inside the 'data/' folder.
    Creates a timestamped file with standardized headers.
    """
    os.makedirs("data", exist_ok=True)  # ensure data folder exists
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    filename = f"data/Object Measurements_{timestamp}.csv"

    with open(filename, mode="w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow([
            "Timestamp",
            "Frame Index",
            "Marker Status",
            "cm/pixel",
            "Width (cm)",
            "Height (cm)",
            "Volume (cm^3)",
            "Margin of Error (%)",
        ])
    return filename


# Global state for time-based throttling
_last_log_time = 0.0
LOG_INTERVAL_SEC = 1.0  # only log once per second


def log_measurement(filename, frame_idx, marker_status, cm_per_pixel, w_cm, h_cm, volume_cm3, err_total):
    """
    Append a measurement entry to the CSV log.
    Only writes once per second to avoid frame-by-frame flooding.
    """
    global _last_log_time
    current_time = time.time()

    # Skip if less than LOG_INTERVAL_SEC since last log
    if current_time - _last_log_time < LOG_INTERVAL_SEC:
        return

    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    try:
        with open(filename, mode="a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow([
                timestamp,
                frame_idx,
                marker_status,
                round(cm_per_pixel, 6),
                round(w_cm, 3),
                round(h_cm, 3),
                round(volume_cm3, 3),
                round(err_total, 3),
            ])
        _last_log_time = current_time
    except Exception as e:
        print(f"‚ö†Ô∏è Failed to write log: {e}")


## Main Measurement Loop
Integrates all modules:
- Captures frames
- Detects ArUco marker
- Processes object
- Computes measurements
- Logs to CSV

In [171]:
def run_measurement(use_webcam=True, video_path="assets/video2.mp4"):
    """
    Run object volume measurement using ArUco marker scale and contour detection.
    Supports:
        - Internal/external webcam (auto-detected)
        - Video file
        - Single image (.jpg, .png, etc.)
    Automatically logs results to CSV (per second for video, immediate for photo).
    Press 'q' to quit or close the image window.
    """

    def find_available_cameras(max_test=5):
        """Scan and list available camera indices."""
        available = []
        for i in range(max_test):
            cap = cv2.VideoCapture(i)
            if cap.isOpened():
                available.append(i)
                cap.release()
        return available

    # Determine input mode
    if not use_webcam and os.path.isfile(video_path):
        ext = os.path.splitext(video_path)[1].lower()
        if ext in [".jpg", ".jpeg", ".png", ".bmp"]:
            mode = "image"
        else:
            mode = "video"
    else:
        mode = "camera"

    # Setup modules
    aruco_detector = setup_aruco_detector()
    csv_filename = init_csv_logger()

    # Config
    reference_width_cm, reference_height_cm = 4.0, 2.1
    marker_id_local, marker_size_cm_local = 0, 5.0
    last_cm_per_pixel_local, soft_mode = None, True

    # Handle Image Mode
    if mode == "image":
        os.makedirs("data/results", exist_ok=True)
        frame = cv2.imread(video_path)
        if frame is None:
            raise IOError(f"Failed to read image file: {video_path}")

        vis = frame.copy()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        cm_per_pixel, _, marker_corners = detect_aruco_scale(
            gray, aruco_detector, marker_id_local, marker_size_cm_local
        )

        if marker_corners is not None:
            cv2.polylines(vis, [np.intp(marker_corners)], True, (255, 0, 0), 2)
            last_cm_per_pixel = cm_per_pixel
            marker_status = f"Marker OK (ID {marker_id_local})"
        else:
            cm_per_pixel = last_cm_per_pixel or 0.05
            marker_status = "Marker lost - using last/fallback scale"

        # show preprocessing mask
        mask = preprocess_image(frame)
        cv2.imshow("Mask Preview", mask)

        contour = detect_largest_contour(mask)
        if contour is not None:
            result = compute_volume_and_error(contour, cm_per_pixel)
            if result:
                w_cm, h_cm, vol_cm3, err_total, (x, y, w, h) = result
                cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(vis, f"Width: {w_cm:.2f} cm  Height: {h_cm:.2f} cm",
                            (x, y - 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
                cv2.putText(vis, f"Vol: {vol_cm3:.1f} cm^3 (¬±{err_total:.1f}%)",
                            (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

                timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                with open(csv_filename, mode="a", newline="") as f:
                    writer = csv.writer(f)
                    writer.writerow([
                        timestamp, 1, marker_status,
                        round(cm_per_pixel, 6), round(w_cm, 3),
                        round(h_cm, 3), round(vol_cm3, 3), round(err_total, 3)
                    ])

                result_path = os.path.join(
                    "data/results",
                    f"result_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.jpg"
                )
                cv2.imwrite(result_path, vis)
                print(f"üñºÔ∏è Saved annotated result to: {result_path}")

        cv2.imshow("Object Volume Measurement (Photo)", vis)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        print(f"\n‚úÖ Data saved to: {csv_filename}")
        return

    #  Video / Webcam Mode
    camera_index = 0
    if mode == "camera":
        available_cams = find_available_cameras()
        if not available_cams:
            raise IOError("‚ùå No camera devices detected.")
        elif len(available_cams) > 1:
            print("üé• Multiple cameras detected:")
            for idx in available_cams:
                print(f"   [{idx}] Camera {idx}")
            try:
                choice = int(input(f"Select camera index [default={available_cams[0]}]: ") or available_cams[0])
                camera_index = choice if choice in available_cams else available_cams[0]
            except ValueError:
                camera_index = available_cams[0]
        else:
            camera_index = available_cams[0]

    cap = cv2.VideoCapture(camera_index if mode == "camera" else video_path)
    if not cap.isOpened():
        raise IOError(f"‚ùå Cannot open video source (index {camera_index} or {video_path}).")

    frame_idx = 0
    print(f"‚ñ∂Ô∏è Starting measurement in {mode.upper()} mode... Press 'q' to stop.")

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_idx += 1

        vis = frame.copy()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        cm_per_pixel, _, marker_corners = detect_aruco_scale(
            gray, aruco_detector, marker_id_local, marker_size_cm_local
        )

        if marker_corners is not None:
            cv2.polylines(vis, [np.intp(marker_corners)], True, (255, 0, 0), 2)
            last_cm_per_pixel_local = cm_per_pixel
            marker_status = f"Marker OK (ID {marker_id_local})"
        else:
            if last_cm_per_pixel_local is not None and soft_mode:
                cm_per_pixel = last_cm_per_pixel_local
                marker_status = "Marker lost - using last scale"
                cv2.putText(vis, f"Scale (last): {last_cm_per_pixel_local:.6f} cm/px",
                            (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 0), 2)
            else:
                h_img, w_img = gray.shape
                cm_per_pixel_x = reference_width_cm / w_img
                cm_per_pixel_y = reference_height_cm / h_img
                cm_per_pixel = (cm_per_pixel_x + cm_per_pixel_y) / 2
                marker_status = "No marker - using reference size"

        # live preprocessing + mask preview
        mask = preprocess_image(frame)
        cv2.imshow("Mask Preview", mask)

        contour = detect_largest_contour(mask)
        if contour is not None:
            result = compute_volume_and_error(contour, cm_per_pixel)
            if result:
                w_cm, h_cm, vol_cm3, err_total, (x, y, w, h) = result
                cv2.rectangle(vis, (x, y), (x + w, y + h), (0, 255, 0), 2)
                cv2.putText(vis, f"Width: {w_cm:.2f} cm  Height: {h_cm:.2f} cm",
                            (x, y - 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
                cv2.putText(vis, f"Volume: {vol_cm3:.1f} cm^3 (Margin of Error: +/-{err_total:.1f}%)",
                            (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
                log_measurement(csv_filename, frame_idx, marker_status,
                                cm_per_pixel, w_cm, h_cm, vol_cm3, err_total)

        cv2.putText(vis, marker_status, (10, 70),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 255), 2)
        cv2.imshow("Object Volume Measurement (Video/Camera)", vis)

        if cv2.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv2.destroyAllWindows()
    print(f"\n‚úÖ Data saved to: {csv_filename}")


## Run the System
Press **Q** to quit the visualization window. Change to **use_webcam=False** and then change the path to the video path to detect video

In [173]:
if __name__ == "__main__":
    run_measurement(use_webcam=1, video_path="assets/video6.mp4")

üé• Multiple cameras detected:
   [0] Camera 0
   [1] Camera 1
   [2] Camera 2
‚ñ∂Ô∏è Starting measurement in CAMERA mode... Press 'q' to stop.

‚úÖ Data saved to: data/Object Measurements_2025-11-13_20-15-22.csv
