#  Code for module 4 question 1

In [1]:
import cv2
import numpy as np
import pytesseract
import depthai as dai

# Camera matrix and distortion coefficients
camera_mtx = np.load('camera_mtx.npy')  # Replace with your actual file path
dist_coeffs = np.load('dist_coeffs.npy')  # Replace with your actual file path


pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'



def preprocess_for_contours(frame):
    gray = frame
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 75, 200)
    return edged

def find_breeze_card(frame, original):
    config = '--oem 3 --psm 11'
    details = pytesseract.image_to_data(frame, config=config, output_type=pytesseract.Output.DICT)
    
    for i in range(len(details['level'])):
        if int(details['conf'][i]) > 60:
            text = details['text'][i].lower()
            if 'breez' or 'marta' in text:
                text_x, text_y, text_w, text_h = int(details['left'][i]), int(details['top'][i]), int(details['width'][i]), int(details['height'][i])

                # Preprocess image for contour detection
                edged = preprocess_for_contours(original)
                contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
                
                # Find the smallest contour that is larger than the text but still contains the entire text
                best_contour = None
                min_area_diff = float('inf')

                for cnt in contours:
                    x, y, w, h = cv2.boundingRect(cnt)
                    # Check if the contour encloses the text
                    if x <= text_x and x + w >= text_x + text_w and y <= text_y and y + h >= text_y + text_h:
                        area_cnt = w * h
                        area_text = text_w * text_h
                        area_diff = area_cnt - area_text

                        if area_diff > 0 and area_diff < min_area_diff:
                            min_area_diff = area_diff
                            best_contour = (x, y, w, h)

                if best_contour:
                    return best_contour

    return None




def calculate_dimensions_in_mm(box, depth_frame):
    #Calculate dimensions of the card using depth data.
    x, y, w, h = box
    depth_roi = depth_frame[y:y+h, x:x+w]
    depth = np.median(depth_roi[depth_roi > 0])  # Avoid zero depth
    
    # Convert pixels to millimeters using the camera matrix
    # We need to solve for real-world coordinates where z (depth) is known
    # and we find the corresponding x and y in real-world units.
    focal_length_px = (camera_mtx[0, 0] + camera_mtx[1, 1]) / 2
    cx, cy = camera_mtx[0, 2], camera_mtx[1, 2]

    x_mm = (x - cx) * depth / focal_length_px
    y_mm = (y - cy) * depth / focal_length_px
    w_mm = w * depth / focal_length_px
    h_mm = h * depth / focal_length_px
    
    return w_mm, h_mm, depth

def create_pipeline():
    pipeline = dai.Pipeline()

    # Create mono cameras
    cam_left = pipeline.create(dai.node.MonoCamera)
    cam_right = pipeline.create(dai.node.MonoCamera)
    # Set camera properties, omitted for brevity

    # Create stereo depth node
    stereo = pipeline.create(dai.node.StereoDepth)
    # Set stereo properties, omitted for brevity

    # Link cameras to stereo depth
    cam_left.out.link(stereo.left)
    cam_right.out.link(stereo.right)

    # Create XLinkOut for grayscale image from left camera
    xout_left = pipeline.create(dai.node.XLinkOut)
    xout_left.setStreamName("left")
    cam_left.out.link(xout_left.input)  # Assuming we are using left camera for grayscale output

    # Create XLinkOut for depth output
    xout_depth = pipeline.create(dai.node.XLinkOut)
    xout_depth.setStreamName("depth")
    stereo.depth.link(xout_depth.input)

    return pipeline

def main():
    pipeline = create_pipeline()
    with dai.Device(pipeline) as device:
        q_gray = device.getOutputQueue(name="left", maxSize=8, blocking=False)
        q_depth = device.getOutputQueue(name="depth", maxSize=8, blocking=False)

        tracker = None
        card_detected = False
        while True:
            in_gray = q_gray.get()  # Get the grayscale frame
            in_depth = q_depth.get()  # Get the depth frame

            gray_frame = in_gray.getCvFrame()
            depth_frame = in_depth.getFrame()

            if not card_detected:
                card_bbox = find_breeze_card(gray_frame,gray_frame)
                if card_bbox:
                    x, y, w, h = card_bbox
                    tracker = cv2.TrackerCSRT_create()
                    tracker.init(gray_frame, (x, y, w, h))
                    card_detected = True
                    print("Card detected and tracking started.")
                else:
                    print("No card detected")
            
            if card_detected:
                success, box = tracker.update(gray_frame)
                if success:
                    x, y, w, h = map(int, box)
                    cv2.rectangle(gray_frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    dimensions_mm = calculate_dimensions_in_mm((x, y, w, h), depth_frame)
                    print(f"Tracked Card dimensions: Width={dimensions_mm[0]:.2f} mm, Height={dimensions_mm[1]:.2f} mm, Depth={dimensions_mm[2]:.2f} mm")

            cv2.imshow("Breeze Card Detection", gray_frame)
            if cv2.waitKey(1) == ord('q'):
                break

        cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

[194430102154F31200] [1.1] [0.862] [MonoCamera(0)] [error] OV7251 only supports THE_480_P/THE_400_P resolutions, defaulting to THE_480_P
[194430102154F31200] [1.1] [0.863] [MonoCamera(1)] [error] OV7251 only supports THE_480_P/THE_400_P resolutions, defaulting to THE_480_P
No card detected
No card detected
No card detected
Card detected and tracking started.
Tracked Card dimensions: Width=246.29 mm, Height=350.49 mm, Depth=2410.00 mm
Tracked Card dimensions: Width=238.68 mm, Height=340.34 mm, Depth=2249.00 mm
Tracked Card dimensions: Width=287.38 mm, Height=408.96 mm, Depth=2812.00 mm
Tracked Card dimensions: Width=596.86 mm, Height=851.07 mm, Depth=5624.00 mm
Tracked Card dimensions: Width=292.90 mm, Height=417.25 mm, Depth=2812.00 mm
Tracked Card dimensions: Width=319.46 mm, Height=455.08 mm, Depth=3067.00 mm
Tracked Card dimensions: Width=298.43 mm, Height=425.54 mm, Depth=2812.00 mm
Tracked Card dimensions: Width=275.40 mm, Height=392.70 mm, Depth=2595.00 mm
Tracked Card dimensions

Note - While testing different objects of different dimensions from different depths were tested.