<a href="https://colab.research.google.com/github/krishnakanth-pawar/Overlapping-Object-Cropping/blob/main/OverlappingObjectCropping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
## Overlapping Object Cropping: Detect face & hand overlap using MediaPipe, crop only the primary object.

In [1]:
!pip install mediapipe

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting numpy<2 (from mediapipe)
  Downloading numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.3-py3-none-any.whl.metadata (1.6 kB)
INFO: pip is looking at multiple versions of jax to determine which version is compatible with other requirements. This could take a while.
Collecting jax (from mediapipe)
  Downloading jax-0.8.0-py3-none-any.whl.metadata (13 kB)
Collecting jaxlib (from mediapipe)
  Downloading jaxlib-0.8.0-cp312-cp312-manylinux_2_27_x86_64.whl.metadata (1.3 kB)
Collecting jax (from mediapipe)
  Do

In [13]:
import cv2
import mediapipe as mp
import numpy as np

# Initialize MediaPipe models
mp_face = mp.solutions.face_mesh
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils

# Utility: convert landmark list to numpy points
def landmarks_to_points(landmarks, image_shape):
    h, w = image_shape[:2]
    return np.array([[int(lm.x * w), int(lm.y * h)] for lm in landmarks])

# Create a binary mask from landmarks (convex hull)
def mask_from_landmarks(landmarks, image_shape):
    if not landmarks:
        return np.zeros(image_shape[:2], dtype=np.uint8)
    points = landmarks_to_points(landmarks, image_shape)
    hull = cv2.convexHull(points)
    mask = np.zeros(image_shape[:2], dtype=np.uint8)
    cv2.fillConvexPoly(mask, hull, 255)
    return mask

# Compute overlap ratio (IoU)
def compute_overlap(mask1, mask2):
    inter = np.logical_and(mask1 > 0, mask2 > 0).sum()
    union = np.logical_or(mask1 > 0, mask2 > 0).sum()
    return inter / union if union > 0 else 0

# Decide which object is primary
def select_primary(face_mask, hand_mask):
    overlap = compute_overlap(face_mask, hand_mask)
    face_area = (face_mask > 0).sum()
    hand_area = (hand_mask > 0).sum()
    face_visible = face_area - (np.logical_and(face_mask, hand_mask).sum())
    hand_visible = hand_area - (np.logical_and(face_mask, hand_mask).sum())

    # Decision logic
    if overlap > 0.2:  # significant overlap
        if hand_visible > 0.6 * face_visible:
            return "hand"
        else:
            return "face"
    else:
        # No significant overlap -> choose larger visible area
        return "face" if face_visible >= hand_visible else "hand"

# Crop the region defined by the selected mask
def crop_primary(image, mask, padding=20):
    ys, xs = np.where(mask > 0)
    if len(xs) == 0 or len(ys) == 0:
        return None
    x1, x2 = max(0, xs.min() - padding), min(image.shape[1], xs.max() + padding)
    y1, y2 = max(0, ys.min() - padding), min(image.shape[0], ys.max() + padding)
    return image[y1:y2, x1:x2]

# Main pipeline
def process_image(path, save_visual=True):
    image = cv2.imread(path)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    with mp_face.FaceMesh(static_image_mode=True, max_num_faces=1) as face_mesh, \
         mp_hands.Hands(static_image_mode=True, max_num_hands=2) as hands:

        face_results = face_mesh.process(image_rgb)
        hand_results = hands.process(image_rgb)

        face_mask = np.zeros(image.shape[:2], dtype=np.uint8)
        hand_mask = np.zeros(image.shape[:2], dtype=np.uint8)

        if face_results.multi_face_landmarks:
            for face_landmarks in face_results.multi_face_landmarks:
                face_mask = mask_from_landmarks(face_landmarks.landmark, image.shape)

        if hand_results.multi_hand_landmarks:
            for hand_landmarks in hand_results.multi_hand_landmarks:
                hand_mask = cv2.bitwise_or(hand_mask, mask_from_landmarks(hand_landmarks.landmark, image.shape))

        if np.count_nonzero(face_mask) == 0 and np.count_nonzero(hand_mask) == 0:
            print("No objects detected.")
            return None

        primary = select_primary(face_mask, hand_mask)
        print(f"Primary object: {primary}")

        crop = crop_primary(image, face_mask if primary == "face" else hand_mask)

        if save_visual:
            overlay = image.copy()
            overlay[face_mask > 0] = [0, 255, 0]
            overlay[hand_mask > 0] = [0, 0, 255]
            blend = cv2.addWeighted(image, 0.7, overlay, 0.3, 0)
            cv2.imwrite("visual_overlay.jpg", blend)
            if crop is not None:
                cv2.imwrite("primary_crop.jpg", crop)

        return crop

if __name__ == "__main__":
    process_image("face1.jpeg")


Primary object: face
