# Step A: Multi-Product Detection on Store Shelves

In this notebook, we will implement an object detection pipeline using ORB features and homography  
to recognize multiple cereal boxes in shelf images.

**Outline:**
1. Import dependencies  
2. Configure paths and ORB parameters  
3. Extract features and match  
4. Filter matches and estimate homography  
5. Compute bounding boxes (center, width, height)  
6. Visualize results

In [16]:
# 1. Import dependencies
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
# Enable inline plotting
%matplotlib inline


## 2. Configuration

- `REF_DIR`: Folder containing reference images (`models/`)  
- `SCENE_DIR`: Folder containing scene images (`scenes/`)  
- `MAX_FEATURES`: Maximum number of ORB keypoints to detect  
- `GOOD_MATCH_RATIO`: Lowe’s ratio threshold for good matches


In [17]:
# 2. Set paths and SIFT parameters
REF_DIR = 'models'
SCENE_DIR = 'scenes'
REF_FILES = [
    "0.jpg", "1.jpg", "11.jpg", "19.jpg",
    "24.jpg", "25.jpg", "26.jpg"
]

MAX_FEATURES    = 0      # unused by SIFT
GOOD_MATCH_RATIO = 0.75  # Lowe’s ratio
MIN_MATCHES      = 8
MIN_INLIERS      = 15

# Initialize SIFT detector + L2 brute‑force matcher
sift = cv2.SIFT_create()                  
bf   = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)

## 3. Feature Extraction and Matching

Define two helper functions:
- `detect_and_compute(img)`: detect ORB keypoints & compute descriptors  
- `match_features(des1, des2, ratio)`: match descriptors via k‑NN and apply Lowe’s ratio test


In [18]:
# 3. Define feature extraction and matching functions with SIFT

def detect_and_compute(img):
    """
    Detect SIFT keypoints and compute descriptors.
    """
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    kp, des = sift.detectAndCompute(gray, None)
    return kp, des

def match_features(des1, des2, ratio=GOOD_MATCH_RATIO):
    """
    Match descriptors using k-NN and apply Lowe's ratio test.
    Returns a list of good matches.
    """
    matches = bf.knnMatch(des1, des2, k=2)
    good = []
    for m, n in matches:
        if m.distance < ratio * n.distance:
            good.append(m)
    return good

## 4. Homography Estimation & Bounding Box Calculation

Function `detect_instances` will:
1. Extract & match features between reference and scene  
2. Check for at least `min_matches` good matches  
3. Estimate homography via RANSAC  
4. Project reference corners into the scene  
5. Compute bounding‑box center, width, height


In [19]:
# 4. Define instance detection function with debug
def detect_instances(ref_img, scene_img, min_inliers=MIN_INLIERS):
    """
    Try to find exactly one instance of ref_img in scene_img.
    Returns (center, (w,h), corners, inliers) if inliers>=min_inliers, else None.
    """
    # extract features
    ref_kp, ref_des     = detect_and_compute(ref_img)
    scene_kp, scene_des = detect_and_compute(scene_img)

    # match + ratio test
    matches = bf.knnMatch(ref_des, scene_des, k=2)
    good    = [m for m,n in matches if m.distance < GOOD_MATCH_RATIO * n.distance]
    if len(good) < 8:         # quick pre‐filter  
        return None

    # homography
    src_pts = np.float32([ref_kp[m.queryIdx].pt for m in good]).reshape(-1,1,2)
    dst_pts = np.float32([scene_kp[m.trainIdx].pt for m in good]).reshape(-1,1,2)
    H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
    if H is None:
        return None

    inliers = int(mask.sum())
    if inliers < min_inliers:
        return None

    # project corners
    h_ref, w_ref = ref_img.shape[:2]
    corners0 = np.float32([[0,0],[w_ref,0],[w_ref,h_ref],[0,h_ref]]).reshape(-1,1,2)
    proj_c   = cv2.perspectiveTransform(corners0, H).reshape(-1,2)
    x_min, y_min = proj_c.min(axis=0)
    x_max, y_max = proj_c.max(axis=0)
    w_box, h_box = x_max-x_min, y_max-y_min
    center = (x_min + w_box/2, y_min + h_box/2)

    return center, (w_box, h_box), proj_c, inliers

    """
    Detect instances of ref_img within scene_img.
    Prints number of good matches for debugging.
    Returns a list of tuples: (center, (width, height), projected_corners).
    """
    # 1) Extract features
    ref_kp, ref_des = detect_and_compute(ref_img)
    scene_kp, scene_des = detect_and_compute(scene_img)
    
    # 2) Match and filter
    good = match_features(ref_des, scene_des)
    print(f"    Found {len(good)} good matches")  # debug
    
    # 3) Check threshold
    if len(good) < min_matches:
        return []
    
    # 4) Prepare points for homography
    src_pts = np.float32([ref_kp[m.queryIdx].pt for m in good]).reshape(-1,1,2)
    dst_pts = np.float32([scene_kp[m.trainIdx].pt for m in good]).reshape(-1,1,2)
    
    # 5) Estimate homography
    H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
    if H is None:
        return []
    
    # 6) Project corners and compute bbox
    h_ref, w_ref = ref_img.shape[:2]
    corners = np.float32([[0,0], [w_ref,0], [w_ref,h_ref], [0,h_ref]]).reshape(-1,1,2)
    proj_corners = cv2.perspectiveTransform(corners, H).reshape(-1,2)
    x_min, y_min = proj_corners.min(axis=0)
    x_max, y_max = proj_corners.max(axis=0)
    w_box, h_box = x_max - x_min, y_max - y_min
    center = (x_min + w_box/2, y_min + h_box/2)
    
    return [(center, (w_box, h_box), proj_corners)]


In [20]:
def suppress_nested(instances, iou_thresh=0.5):
    """
    Rimuove le istanze i cui bounding-box sono in gran parte contenuti in altri.
    - instances: lista di tuple (center, (w,h), corners, inliers)
    - iou_thresh: soglia di contenimento (fra 0 e 1)
    Restituisce la lista filtered senza i box troppo nidificati.
    """
    # Costruisco una lista di box axis-aligned [x1,y1,x2,y2,inliers,idx]
    boxes = []
    for idx, (center, (w, h), corners, inliers) in enumerate(instances):
        x1, y1 = corners.min(axis=0)
        x2, y2 = corners.max(axis=0)
        boxes.append([x1, y1, x2, y2, inliers, idx])
    # Ordino per inlier decrescente (più robusto primo)
    boxes.sort(key=lambda b: b[4], reverse=True)

    keep = []
    for b in boxes:
        x1,y1,x2,y2,ins,i = b
        discard = False
        for kb in keep:
            xx1,yy1,xx2,yy2,_,_ = kb
            # calcolo area d’intersezione
            ix1, iy1 = max(x1,xx1), max(y1,yy1)
            ix2, iy2 = min(x2,xx2), min(y2,yy2)
            if ix2 > ix1 and iy2 > iy1:
                inter = (ix2-ix1)*(iy2-iy1)
                area_b = (x2-x1)*(y2-y1)
                if inter/area_b > iou_thresh:
                    discard = True
                    break
        if not discard:
            keep.append(b)

    # ricostruisco la lista delle istanze tenute
    filtered = [instances[b[5]] for b in keep]
    return filtered


## 5. Execution & Visualization

Loop over all scenes, detect each reference, draw bounding boxes, and display both  
the annotated image and detection details in the console.


In [None]:
# —— Cell: definizione delle funzioni di rilevamento multiplo e soppressione ——

def detect_multiple_instances(ref_img, scene_img,
                              min_inliers=MIN_INLIERS,
                              ratio=GOOD_MATCH_RATIO,
                              max_iter=5):
    """
    Cerca fino a max_iter istanze di ref_img in scene_img:
    - estrae feature SIFT
    - knnMatch + Lowe’s ratio
    - itera RANSAC, rimuovendo gli inlier trovati
    Restituisce lista di (center, (w,h), corners, inliers).
    """
    ref_kp, ref_des     = detect_and_compute(ref_img)
    scene_kp, scene_des = detect_and_compute(scene_img)
    matches = bf.knnMatch(ref_des, scene_des, k=2)
    good    = [m for m,n in matches if m.distance < ratio * n.distance]

    instances = []
    for _ in range(max_iter):
        if len(good) < 8:
            break

        src_pts = np.float32([ref_kp[m.queryIdx].pt for m in good]).reshape(-1,1,2)
        dst_pts = np.float32([scene_kp[m.trainIdx].pt for m in good]).reshape(-1,1,2)
        H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
        if H is None:
            break

        inlier_mask = mask.ravel().astype(bool)
        inliers     = int(inlier_mask.sum())
        if inliers < min_inliers:
            break

        # rifinitura omografia su inlier
        src_in = src_pts[inlier_mask]
        dst_in = dst_pts[inlier_mask]
        H_ref, _ = cv2.findHomography(src_in, dst_in, 0)

        # proiezione degli angoli reali
        h_ref, w_ref = ref_img.shape[:2]
        corners0 = np.float32([[0,0],[w_ref,0],[w_ref,h_ref],[0,h_ref]]).reshape(-1,1,2)
        corners_proj = cv2.perspectiveTransform(corners0, H_ref).reshape(-1,2)

        # centro e dimensioni axis-aligned
        x_min, y_min = corners_proj.min(axis=0)
        x_max, y_max = corners_proj.max(axis=0)
        center = ((x_min + x_max)/2, (y_min + y_max)/2)
        dims   = (x_max - x_min, y_max - y_min)

        instances.append((center, dims, corners_proj.astype(int), inliers))

        # rimuovo gli inlier per la prossima iterazione
        good = [m for idx, m in enumerate(good) if not inlier_mask[idx]]

    return instances


def suppress_nested(instances, iou_thresh=0.5):
    """
    Rimuove le istanze i cui bounding-box (axis-aligned) sono per >iou_thresh
    contenute in un'altra detection più robusta.
    """
    boxes = []
    for idx, (center, (w,h), corners, inliers) in enumerate(instances):
        x1, y1 = corners.min(axis=0)
        x2, y2 = corners.max(axis=0)
        boxes.append([x1, y1, x2, y2, inliers, idx])

    boxes.sort(key=lambda b: b[4], reverse=True)
    keep = []
    for b in boxes:
        x1,y1,x2,y2,ins,i = b
        discard = False
        for kb in keep:
            xx1,yy1,xx2,yy2,_,_ = kb
            ix1, iy1 = max(x1,xx1), max(y1,yy1)
            ix2, iy2 = min(x2,xx2), min(y2,yy2)
            if ix2>ix1 and iy2>iy1:
                inter = (ix2-ix1)*(iy2-iy1)
                area = (x2-x1)*(y2-y1)
                if inter/area > iou_thresh:
                    discard = True
                    break
        if not discard:
            keep.append(b)

    return [instances[b[5]] for b in keep]


In [21]:
# 5. Run multi-instance detection with suppression of nested boxes
for scene_file in sorted(os.listdir(SCENE_DIR)):
    if not scene_file.lower().endswith(('.png','.jpg','.jpeg')):
        continue

    scene = cv2.imread(os.path.join(SCENE_DIR, scene_file))
    disp  = scene.copy()

    for ref_file in REF_FILES:
        ref_img = cv2.imread(os.path.join(REF_DIR, ref_file))
        insts   = detect_multiple_instances(ref_img, scene)

        # ➊ elimino le istanze troppo nidificate
        insts = suppress_nested(insts, iou_thresh=0.5)

        # ➋ disegno solo quelle rimaste
        for center, (w_box, h_box), corners, inliers in insts:
            pts = corners.reshape(-1,1,2)
            cv2.polylines(disp, [pts], True, (0,255,0), 2)
            x0, y0 = corners[0]
            cv2.putText(disp, ref_file, (int(x0), int(y0)-5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,0), 2)
            print(f"{scene_file} – {ref_file}: inliers={inliers}, "
                  f"center=({center[0]:.1f},{center[1]:.1f}), "
                  f"w={w_box:.1f}, h={h_box:.1f}")

    # Mostro l’immagine annotata
    plt.figure(figsize=(12,6))
    plt.title(f"Detections in {scene_file}")
    plt.axis('off')
    plt.imshow(cv2.cvtColor(disp, cv2.COLOR_BGR2RGB))
    plt.show()


NameError: name 'detect_multiple_instances' is not defined