## Install Dependencies and Import Libraries

In [1]:
# Python version must be 3.6 ~ 3.10, which is officially supported for pyrealsense2 library
!pip install -r requirements1.txt

Collecting scikit-learn (from -r requirements1.txt (line 7))
  Using cached scikit_learn-1.6.1-cp310-cp310-win_amd64.whl.metadata (15 kB)
Collecting joblib>=1.2.0 (from scikit-learn->-r requirements1.txt (line 7))
  Downloading joblib-1.5.1-py3-none-any.whl.metadata (5.6 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn->-r requirements1.txt (line 7))
  Using cached threadpoolctl-3.6.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.6.1-cp310-cp310-win_amd64.whl (11.1 MB)
Downloading joblib-1.5.1-py3-none-any.whl (307 kB)
Using cached threadpoolctl-3.6.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn

   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------- 1/3 [joblib]
   ------------- -------------------------

In [2]:
import pyrealsense2 as rs
import numpy as np
import cv2
import os
import glob
from ultralytics import YOLO
from datetime import datetime

In [3]:
IMAGE_CAPTURE_D435I_PATH = "captured_images/D435i/"
VIDEO_CAPTURE_D435I_PATH = "captured_videos/D435i/"

## Intel D435i Streaming and Image Capture Code

In [4]:
# Create output folder
output_folder = IMAGE_CAPTURE_D435I_PATH + "raw_images/"
os.makedirs(output_folder, exist_ok=True)

# Find next available image number
existing_files = glob.glob(os.path.join(output_folder, "imgCapture_*_color.png"))
existing_ids = [
    int(os.path.basename(f).split("_")[1]) 
    for f in existing_files 
    if os.path.basename(f).split("_")[1].isdigit()
]
img_counter = max(existing_ids) + 1 if existing_ids else 1

# Initialize RealSense pipeline
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
pipeline.start(config)

print("Press SPACEBAR to capture RGB + Depth. Press ESC to exit.")

try:
    while True:
        # Wait for frames
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        depth_frame = frames.get_depth_frame()

        if not color_frame or not depth_frame:
            continue

        # Convert to numpy arrays
        color_image = np.asanyarray(color_frame.get_data())
        depth_image = np.asanyarray(depth_frame.get_data())

        # Convert depth to colormap for visualization
        depth_colormap = cv2.applyColorMap(
            cv2.convertScaleAbs(depth_image, alpha=0.03),
            cv2.COLORMAP_JET
        )

        # Display combined image
        combined = np.hstack((color_image, depth_colormap))
        cv2.imshow("RealSense - SPACE to Capture | ESC to Exit", combined)

        key = cv2.waitKey(1) & 0xFF

        if key == 27:  # ESC
            print("Exiting...")
            break
        elif key == 32:  # SPACEBAR
            # Define filenames
            base_name = f"imgCapture_{img_counter}"
            rgb_path = os.path.join(output_folder, f"{base_name}_color.png")
            depth_raw_path = os.path.join(output_folder, f"{base_name}_depth.png")
            depth_colormap_path = os.path.join(output_folder, f"{base_name}_depth_colormap.png")

            # Save images
            cv2.imwrite(rgb_path, color_image)
            cv2.imwrite(depth_raw_path, depth_image)  # 16-bit raw depth
            cv2.imwrite(depth_colormap_path, depth_colormap)

            print(f"Captured: {rgb_path}, {depth_raw_path}, {depth_colormap_path}")
            img_counter += 1

finally:
    pipeline.stop()
    cv2.destroyAllWindows()

Press SPACEBAR to capture RGB + Depth. Press ESC to exit.
Exiting...


# <b>1. Basic CV Based Approach

## Real-Time Canny Edge Detection using Intel D435i

In [5]:
# Create output folder
output_folder = IMAGE_CAPTURE_D435I_PATH + "cannyEdge/"
os.makedirs(output_folder, exist_ok=True)

# Find next available image number
existing_files = glob.glob(os.path.join(output_folder, "imgCapture_*_color.png"))
existing_ids = [
    int(os.path.basename(f).split("_")[1]) 
    for f in existing_files 
    if os.path.basename(f).split("_")[1].isdigit()
]
img_counter = max(existing_ids) + 1 if existing_ids else 1

# Initialize pipeline and configure streams
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
pipeline.start(config)

print("Press SPACEBAR to capture image. Press ESC to exit.")

try:
    while True:
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        depth_frame = frames.get_depth_frame()

        if not color_frame or not depth_frame:
            continue

        # Convert frames to numpy arrays
        color_image = np.asanyarray(color_frame.get_data())
        depth_image = np.asanyarray(depth_frame.get_data())

        # --- RGB Edge Detection ---
        gray = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)
        # gray_blur = cv2.GaussianBlur(gray, (5, 5), 1.4)
        # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        # gray_eq = clahe.apply(gray_blur)
        # edges_rgb = cv2.Canny(gray_eq, 50, 150)
        gray_blur = cv2.GaussianBlur(gray, (3, 3), 0.8)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        gray_eq = clahe.apply(gray_blur)
        edges_rgb = cv2.Canny(gray_eq, 30, 100)

        # --- Depth Edge Detection ---
        depth_8u = cv2.convertScaleAbs(depth_image, alpha=0.03)
        # depth_blur = cv2.GaussianBlur(depth_8u, (5, 5), 1.4)
        # clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        # depth_eq = clahe.apply(depth_blur)
        # edges_depth = cv2.Canny(depth_eq, 50, 150)
        depth_blur = cv2.GaussianBlur(depth_8u, (3, 3), 0.8)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        depth_eq = clahe.apply(depth_blur)
        edges_depth = cv2.Canny(depth_eq, 30, 100)

        # Stack edges for display
        combined = np.hstack((edges_rgb, edges_depth))
        cv2.imshow("Canny Edges | RGB (left) vs Depth (right)", combined)

        key = cv2.waitKey(1) & 0xFF

        if key == 27:  # ESC
            print("Exiting...")
            break

        elif key == 32:  # SPACEBAR
            base_name = f"imgCapture_{img_counter}"
            cv2.imwrite(os.path.join(output_folder, f"{base_name}_color.png"), color_image)
            cv2.imwrite(os.path.join(output_folder, f"{base_name}_depth.png"), depth_image)
            cv2.imwrite(os.path.join(output_folder, f"{base_name}_edges_rgb.png"), edges_rgb)
            cv2.imwrite(os.path.join(output_folder, f"{base_name}_edges_depth.png"), edges_depth)

            print(f"Captured: {base_name}_*.png")
            img_counter += 1

finally:
    pipeline.stop()
    cv2.destroyAllWindows()

Press SPACEBAR to capture image. Press ESC to exit.
Exiting...


## Real-Time Object Segmentation using Intel D435i (via Adaptive Thresholding)

In [None]:
# === Output folder setup ===
output_folder = IMAGE_CAPTURE_D435I_PATH + "segment_colThresh/"
os.makedirs(output_folder, exist_ok=True)

existing_files = glob.glob(os.path.join(output_folder, "threshCapture_*.png"))
existing_ids = [
    int(os.path.basename(f).split("_")[1].split(".")[0])
    for f in existing_files
    if os.path.basename(f).split("_")[1].split(".")[0].isdigit()
]
img_counter = max(existing_ids) + 1 if existing_ids else 1

# === RealSense Setup ===
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
pipeline.start(config)

print("Running threshold-based segmentation... Press SPACEBAR to save annotated image, ESC to exit.")

try:
    while True:
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        if not color_frame:
            continue

        color_image = np.asanyarray(color_frame.get_data())
        gray = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)

        # Apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
        clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
        gray = clahe.apply(gray)

        # === Adaptive Thresholding to handle light/dark backgrounds ===
        thresh = cv2.adaptiveThreshold(
            gray, 255,
            cv2.ADAPTIVE_THRESH_MEAN_C,
            cv2.THRESH_BINARY_INV, 35, 5
        )

        # === Morphological cleaning ===
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
        clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)

        # === Find largest contour ===
        contours, _ = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        output = color_image.copy()
        if contours:
            largest = max(contours, key=cv2.contourArea)
            x, y, w, h = cv2.boundingRect(largest)
            cv2.rectangle(output, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(output, "Threshold-Segmented Object", (x, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)

        # === Show both original and threshold views ===
        clean_bgr = cv2.cvtColor(clean, cv2.COLOR_GRAY2BGR)
        stacked = np.hstack((output, clean_bgr))
        cv2.imshow("Threshold Segmentation | RGB (left) + Binary Mask (right)", stacked)

        key = cv2.waitKey(1) & 0xFF
        if key == 27:
            break
        elif key == 32 and contours:
            filename = os.path.join(output_folder, f"threshCapture_{img_counter}.png")
            cv2.imwrite(filename, output)
            print(f"Saved segmented image: {filename}")
            img_counter += 1

finally:
    pipeline.stop()
    cv2.destroyAllWindows()

Running threshold-based segmentation... Press SPACEBAR to save annotated image, ESC to exit.


## Fracture Identification Code

In [7]:
# === Output folder setup ===
output_folder = IMAGE_CAPTURE_D435I_PATH + "captured_fractures/"
os.makedirs(output_folder, exist_ok=True)

existing_files = glob.glob(os.path.join(output_folder, "fractureCapture_*.png"))
existing_ids = [
    int(os.path.basename(f).split("_")[1].split(".")[0])
    for f in existing_files
    if os.path.basename(f).split("_")[1].split(".")[0].isdigit()
]
img_counter = max(existing_ids) + 1 if existing_ids else 1

# === RealSense Setup ===
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
pipeline.start(config)

print("Running segmentation + fracture detection... SPACEBAR = save fractures only | ESC = exit.")

# === Fracture Detection ===
def detect_fractures(gray_img):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    gray_eq = clahe.apply(gray_img)
    _, thresh = cv2.threshold(gray_eq, 85, 255, cv2.THRESH_BINARY_INV)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    fractures = []
    for cnt in contours:
        area = cv2.contourArea(cnt)
        if 65 < area < 1000:
            x, y, w, h = cv2.boundingRect(cnt)
            fractures.append((x, y, w, h))
    return fractures

try:
    while True:
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        if not color_frame:
            continue

        color_image = np.asanyarray(color_frame.get_data())
        gray = cv2.cvtColor(color_image, cv2.COLOR_BGR2GRAY)

        # === Segmentation (optional preprocessing) ===
        clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
        gray_eq = clahe.apply(gray)

        thresh = cv2.adaptiveThreshold(
            gray_eq, 255,
            cv2.ADAPTIVE_THRESH_MEAN_C,
            cv2.THRESH_BINARY_INV, 35, 3
        )
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
        clean = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)

        contours, _ = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        roi_gray = None
        offset = (0, 0)

        # === Segmentation Block ===
        if contours:
            largest = max(contours, key=cv2.contourArea)
            x, y, w, h = cv2.boundingRect(largest)
            roi_gray = gray_eq[y:y+h, x:x+w]
            offset = (x, y)

        # === Fallback if no object detected ===
        if roi_gray is None:
            roi_gray = gray_eq
            offset = (0, 0)

        # === Fracture detection only ===
        fracture_image = color_image.copy()
        fractures = detect_fractures(roi_gray)
        for (fx, fy, fw, fh) in fractures:
            cv2.rectangle(fracture_image, (offset[0] + fx, offset[1] + fy),
                          (offset[0] + fx + fw, offset[1] + fy + fh), (0, 0, 255), 2)
            cv2.putText(fracture_image, "Fracture", (offset[0] + fx, offset[1] + fy - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)

        # === Display fracture detection live ===
        cv2.imshow("Fracture Detection Only", fracture_image)

        key = cv2.waitKey(1) & 0xFF
        if key == 27:
            break
        elif key == 32:
            filename = os.path.join(output_folder, f"fractureCapture_{img_counter}.png")
            cv2.imwrite(filename, fracture_image)
            print(f"Saved fracture image: {filename}")
            img_counter += 1

finally:
    pipeline.stop()
    cv2.destroyAllWindows()

Running segmentation + fracture detection... SPACEBAR = save fractures only | ESC = exit.


# <b>2. Machine Learning Based Approach

## Real Time Object Type Detection using Intel D435i and YOLOv8

In [10]:
# Load YOLOv8 model
model = YOLO("object_det_best.pt")  # replace with your model path

# RealSense camera setup
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
pipeline.start(config)

# Video recording setup
recording = False
video_writer = None
video_filename = None
fps = 30
frame_size = (640, 480)

print("Press SPACE to start/stop recording, ESC to exit.")

try:
    while True:
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        if not color_frame:
            continue

        frame = np.asanyarray(color_frame.get_data())

        # Run YOLOv8 inference
        results = model.predict(source=frame, save=False, stream=False, show=False, verbose=False)
        annotated = results[0].plot()

        # Show frame
        cv2.imshow("YOLOv8 RealSense Inference", annotated)

        # Write to video if recording
        if recording and video_writer:
            video_writer.write(annotated)

        key = cv2.waitKey(1) & 0xFF

        # SPACE: Toggle recording
        if key == 32:
            recording = not recording
            if recording:
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                video_filename = f"output_{timestamp}.avi"
                video_writer = cv2.VideoWriter(
                    video_filename,
                    cv2.VideoWriter_fourcc(*'XVID'),
                    fps,
                    frame_size
                )
                print(f"Recording started: {video_filename}")
            else:
                if video_writer:
                    video_writer.release()
                    print(f"Recording saved: {video_filename}")
                    video_writer = None

        # ESC: Exit
        elif key == 27:
            print("ESC pressed. Exiting...")
            break

finally:
    pipeline.stop()
    if video_writer:
        video_writer.release()
    cv2.destroyAllWindows()
    print("Cleanup complete.")

Press SPACE to start/stop recording, ESC to exit.
ESC pressed. Exiting...
Cleanup complete.


## Real Time Defect Detection using Intel D435i and YOLOv8

In [11]:
# Load models
object_model = YOLO("object_det_best.pt")
defect_model = YOLO("defect_det_best.pt")

# RealSense setup
pipeline = rs.pipeline()
config = rs.config()
config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
pipeline.start(config)

# Video recording setup
recording = False
video_writer = None
video_filename = None
fps = 30
frame_size = (1280, 480)  # width doubled due to side-by-side view

print("Press SPACE to start/stop recording, ESC to exit.")

try:
    while True:
        frames = pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        if not color_frame:
            continue

        frame = np.asanyarray(color_frame.get_data())

        # Create two copies of the frame for different outputs
        object_frame = frame.copy()
        defect_frame = frame.copy()

        # Step 1: Detect object of interest
        object_results = object_model.predict(source=frame, save=False, stream=False, show=False, verbose=False)
        object_boxes = object_results[0].boxes.xyxy.cpu().numpy().astype(int)
        object_classes = object_results[0].boxes.cls.cpu().numpy().astype(int) if object_results[0].boxes.cls is not None else []

        for i, (x1, y1, x2, y2) in enumerate(object_boxes):
            # Draw object detection on left frame
            if len(object_classes) > i:
                label = object_results[0].names[object_classes[i]]
            else:
                label = "Object"
            cv2.rectangle(object_frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(object_frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

            # Step 2: Run defect detection within object ROI
            roi = frame[y1:y2, x1:x2]
            if roi.size == 0:
                continue

            defect_results = defect_model.predict(source=roi, save=False, stream=False, show=False, verbose=False)
            defect_boxes = defect_results[0].boxes.xyxy.cpu().numpy().astype(int)
            defect_classes = defect_results[0].boxes.cls.cpu().numpy().astype(int) if defect_results[0].boxes.cls is not None else []

            for j, (dx1, dy1, dx2, dy2) in enumerate(defect_boxes):
                gx1, gy1, gx2, gy2 = x1 + dx1, y1 + dy1, x1 + dx2, y1 + dy2
                cv2.rectangle(defect_frame, (gx1, gy1), (gx2, gy2), (0, 255, 255), 2)

                if defect_results[0].names and len(defect_classes) > 0:
                    defect_label = defect_results[0].names[defect_classes[j]]
                    cv2.putText(defect_frame, defect_label, (gx1, gy1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 2)

        # Concatenate and show the frames side by side
        combined_view = cv2.hconcat([object_frame, defect_frame])
        cv2.imshow("Object Detection (Left) | Defect Detection (Right)", combined_view)

        # Record if needed
        if recording and video_writer:
            video_writer.write(combined_view)

        key = cv2.waitKey(1) & 0xFF

        # SPACE: Toggle recording
        if key == 32:
            recording = not recording
            if recording:
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                video_filename = f"output_{timestamp}.avi"
                video_writer = cv2.VideoWriter(
                    video_filename,
                    cv2.VideoWriter_fourcc(*'XVID'),
                    fps,
                    frame_size
                )
                print(f"Recording started: {video_filename}")
            else:
                if video_writer:
                    video_writer.release()
                    print(f"Recording saved: {video_filename}")
                    video_writer = None

        # ESC: Exit
        elif key == 27:
            print("ESC pressed. Exiting...")
            break

finally:
    pipeline.stop()
    if video_writer:
        video_writer.release()
    cv2.destroyAllWindows()
    print("Cleanup complete.")

Press SPACE to start/stop recording, ESC to exit.
ESC pressed. Exiting...
Cleanup complete.
