In [None]:
!pip install openmim
!pip install git+https://github.com/jin-s13/xtcocoapi

In [None]:
!pip uninstall torch torchvision torchaudio -y
!pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 -f https://download.pytorch.org/whl/torch_stable.html
!pip install --trusted-host download.openmmlab.com -f https://download.openmmlab.com/mmcv/dist/cu118/torch2.0.0/index.html mmcv==2.0.1
!mim install mmengine
!mim install mmdet==3.2.0
!git clone https://github.com/open-mmlab/mmpose.git
%cd mmpose
!pip install -e .
!pip install "numpy<2.0"

In [None]:
%cd mmpose

In [None]:
import mmcv
from mmcv import imread
import mmengine
from mmengine.registry import init_default_scope
import numpy as np
from mmpose.apis import inference_topdown
from mmpose.apis import init_model as init_pose_estimator
from mmpose.evaluation.functional import nms
from mmpose.registry import VISUALIZERS
from mmpose.structures import merge_data_samples
from mmdet.apis import inference_detector, init_detector
import cv2
from scipy.spatial import ConvexHull
from scipy.spatial.distance import cdist
from typing import List, Tuple, Optional, Dict
import matplotlib.pyplot as plt
import os
from sklearn.cluster import DBSCAN


detector = None
pose_estimator = None
visualizer = None


BODY_KEYPOINTS = list(range(17))
FACE_KEYPOINTS = list(range(17, 17 + 68))
LEFT_HAND_KEYPOINTS = list(range(95, 95 + 21))
RIGHT_HAND_KEYPOINTS = list(range(116, 116 + 21))
LEFT_FOOT_KEYPOINTS = list(range(137, 140)) if 137 < 133 else []
RIGHT_FOOT_KEYPOINTS = list(range(140, 143)) if 140 < 133 else []

# 3-part body definitions
FACE_POINTS = [0] + FACE_KEYPOINTS  # nose + all face keypoints

UPPER_BODY_POINTS = [5, 6, 7, 8, 9, 10, 11, 12]  # shoulders + arms + hips (NO FACE/NECK/EYES)

LOWER_BODY_POINTS = [11, 12, 13, 14, 15, 16] + LEFT_FOOT_KEYPOINTS + RIGHT_FOOT_KEYPOINTS  # hips, legs + feet

# Colors for 3 body parts (BGR format for OpenCV)
FACE_COLOR = [0, 0, 255]      # Red
UPPER_BODY_COLOR = [0, 255, 0] # Green  
LOWER_BODY_COLOR = [255, 0, 0] # Blue



# Model initialization
det_config = 'projects/rtmpose/rtmdet/person/rtmdet_m_640-8xb32_coco-person.py'
det_checkpoint = 'https://download.openmmlab.com/mmpose/v1/projects/rtmpose/rtmdet_m_8xb32-100e_coco-obj365-person-235e8209.pth'
pose_config = 'configs/wholebody_2d_keypoint/topdown_heatmap/coco-wholebody/td-hm_hrnet-w48_dark-8xb32-210e_coco-wholebody-384x288.py'
pose_checkpoint = 'https://download.openmmlab.com/mmpose/top_down/hrnet/hrnet_w48_coco_wholebody_384x288_dark-f5726563_20200918.pth'

detector = init_detector(det_config, det_checkpoint, device='cuda:0')
cfg_options = dict(model=dict(test_cfg=dict(output_heatmaps=True)))
pose_estimator = init_pose_estimator(pose_config, pose_checkpoint, device='cuda:0', cfg_options=cfg_options)
pose_estimator.cfg.visualizer.radius = 2
pose_estimator.cfg.visualizer.line_width = 1
visualizer = VISUALIZERS.build(pose_estimator.cfg.visualizer)
visualizer.set_dataset_meta(pose_estimator.dataset_meta)


img_path = '/kaggle/input/full-human-img/mother-daughter-sitting-back-to-back-6603114.webp'  
output_dir = '/kaggle/working/outputs'

img = mmcv.imread(img_path, channel_order='rgb')

scope = detector.cfg.get('default_scope', 'mmdet')
if scope is not None:
    init_default_scope(scope)

detect_result = inference_detector(detector, img_path)
pred_instance = detect_result.pred_instances.numpy()

person_mask = pred_instance.labels == 0
score_mask = pred_instance.scores > 0.3
valid_detections = person_mask & score_mask

if not np.any(valid_detections):
    print("No people detected!")
    exit()

bboxes = np.concatenate((pred_instance.bboxes[valid_detections], 
                       pred_instance.scores[valid_detections][:, None]), axis=1)
bboxes = bboxes[nms(bboxes, 0.3)]
areas = (bboxes[:, 2] - bboxes[:, 0]) * (bboxes[:, 3] - bboxes[:, 1])
valid_size = areas > 1000
bboxes = bboxes[valid_size]
bboxes = bboxes[:, :4]



init_default_scope('mmpose')
pose_results = inference_topdown(pose_estimator, img_path, bboxes)

valid_keypoints = []
for i, pose_result in enumerate(pose_results):
    if pose_result.pred_instances.keypoints.shape[0] > 0:
        kpts = pose_result.pred_instances.keypoints[0]
        scores = pose_result.pred_instances.keypoint_scores[0]
        keypoints_with_conf = np.concatenate([kpts, scores.reshape(-1, 1)], axis=1)
        
        body_valid = np.sum(scores[:17] > 0.3)
        if body_valid >= 5:
            valid_keypoints.append(keypoints_with_conf)
            print(f"Person {i+1}: {body_valid}/17 body keypoints, "
                  f"{np.sum(scores[17:85] > 0.3)}/68 face keypoints")

print(f"Got valid poses for {len(valid_keypoints)} people")

# 3-part body masks
face_combined = np.zeros(img.shape[:2], dtype=np.uint8)
upper_combined = np.zeros(img.shape[:2], dtype=np.uint8)
lower_combined = np.zeros(img.shape[:2], dtype=np.uint8)

expand_ratio = 0.2

for person_id, keypoints in enumerate(valid_keypoints):
    print(f"Creating masks for person {person_id + 1}...")
    
    # FACE MASK (using face keypoints)
    face_mask = np.zeros(img.shape[:2], dtype=np.uint8)
    face_points = []
    
    # Get face keypoints
    for idx in FACE_KEYPOINTS:
        if idx < len(keypoints) and keypoints[idx, 2] > 0.2:
            face_points.append(keypoints[idx, :2])
    
    # Add nose point
    if keypoints[0, 2] > 0.3:
        face_points.append(keypoints[0, :2])
    
    if len(face_points) > 0:
        face_points = np.array(face_points)
        
        if len(face_points) >= 3:
            # Remove outliers
            if len(face_points) > 10:
                clustering = DBSCAN(eps=50, min_samples=2).fit(face_points)
                labels = clustering.labels_
                unique_labels, counts = np.unique(labels[labels != -1], return_counts=True)
                if len(unique_labels) > 0:
                    largest_cluster = unique_labels[np.argmax(counts)]
                    face_points = face_points[labels == largest_cluster]
            
            # Create convex hull
            if len(face_points) >= 3:
                hull = ConvexHull(face_points)
                hull_points = face_points[hull.vertices]
                center = np.mean(hull_points, axis=0)
                hull_points = center + (hull_points - center) * (1 + expand_ratio)
                hull_points = np.clip(hull_points, [0, 0], [img.shape[1]-1, img.shape[0]-1])
                cv2.fillPoly(face_mask, [hull_points.astype(np.int32)], 255)
            else:
                center = np.mean(face_points, axis=0)
                radius = max(40, int(np.std(face_points) * (2 + expand_ratio)))
                cv2.circle(face_mask, tuple(center.astype(int)), radius, 255, -1)
        else:
            center = np.mean(face_points, axis=0)
            radius = max(35, int(min(img.shape) * 0.06 * (1 + expand_ratio)))
            cv2.circle(face_mask, tuple(center.astype(int)), radius, 255, -1)
    
    # UPPER BODY MASK
    upper_mask = np.zeros(img.shape[:2], dtype=np.uint8)
    upper_points = []

    for idx in UPPER_BODY_POINTS:
        if idx < len(keypoints) and keypoints[idx, 2] > 0.3:
            upper_points.append(keypoints[idx, :2])
    
    if len(upper_points) > 0:
        upper_points = np.array(upper_points)
        
        if len(upper_points) >= 3:
            # Remove outliers
            if len(upper_points) > 10:
                clustering = DBSCAN(eps=50, min_samples=2).fit(upper_points)
                labels = clustering.labels_
                unique_labels, counts = np.unique(labels[labels != -1], return_counts=True)
                if len(unique_labels) > 0:
                    largest_cluster = unique_labels[np.argmax(counts)]
                    upper_points = upper_points[labels == largest_cluster]
            
            # Create convex hull
            if len(upper_points) >= 3:
                hull = ConvexHull(upper_points)
                hull_points = upper_points[hull.vertices]
                center = np.mean(hull_points, axis=0)
                hull_points = center + (hull_points - center) * (1 + expand_ratio)
                hull_points = np.clip(hull_points, [0, 0], [img.shape[1]-1, img.shape[0]-1])
                cv2.fillPoly(upper_mask, [hull_points.astype(np.int32)], 255)
            else:
                # Fallback to bounding box
                x_min, y_min = np.min(upper_points, axis=0).astype(int)
                x_max, y_max = np.max(upper_points, axis=0).astype(int)
                
                width, height = x_max - x_min, y_max - y_min
                expand_w = int(width * expand_ratio)
                expand_h = int(height * expand_ratio)
                x_min = max(0, x_min - expand_w)
                y_min = max(0, y_min - expand_h)
                x_max = min(img.shape[1], x_max + expand_w)
                y_max = min(img.shape[0], y_max + expand_h)
                
                cv2.rectangle(upper_mask, (x_min, y_min), (x_max, y_max), 255, -1)
    
    # LOWER BODY MASK (legs + feet)
    lower_mask = np.zeros(img.shape[:2], dtype=np.uint8)
    lower_points = []
    
    # Get lower body points
    for idx in LOWER_BODY_POINTS:
        if idx < len(keypoints) and keypoints[idx, 2] > 0.3:
            lower_points.append(keypoints[idx, :2])
    
    if len(lower_points) > 0:
        lower_points = np.array(lower_points)
        
        if len(lower_points) >= 3:
            # Remove outliers
            if len(lower_points) > 10:
                clustering = DBSCAN(eps=50, min_samples=2).fit(lower_points)
                labels = clustering.labels_
                unique_labels, counts = np.unique(labels[labels != -1], return_counts=True)
                if len(unique_labels) > 0:
                    largest_cluster = unique_labels[np.argmax(counts)]
                    lower_points = lower_points[labels == largest_cluster]
            
            # Create convex hull
            if len(lower_points) >= 3:
                hull = ConvexHull(lower_points)
                hull_points = lower_points[hull.vertices]
                center = np.mean(hull_points, axis=0)
                hull_points = center + (hull_points - center) * (1 + expand_ratio)
                hull_points = np.clip(hull_points, [0, 0], [img.shape[1]-1, img.shape[0]-1])
                cv2.fillPoly(lower_mask, [hull_points.astype(np.int32)], 255)
            else:
                # Fallback to bounding box
                x_min, y_min = np.min(lower_points, axis=0).astype(int)
                x_max, y_max = np.max(lower_points, axis=0).astype(int)
                
                width, height = x_max - x_min, y_max - y_min
                expand_w = int(width * expand_ratio)
                expand_h = int(height * expand_ratio)
                x_min = max(0, x_min - expand_w)
                y_min = max(0, y_min - expand_h)
                x_max = min(img.shape[1], x_max + expand_w)
                y_max = min(img.shape[0], y_max + expand_h)
                
                cv2.rectangle(lower_mask, (x_min, y_min), (x_max, y_max), 255, -1)
    
    # Add to combined masks
    face_combined[face_mask == 255] = person_id + 1
    upper_combined[upper_mask == 255] = person_id + 1
    lower_combined[lower_mask == 255] = person_id + 1

# Create overlays and save results
os.makedirs(output_dir, exist_ok=True)

# Save individual masks
cv2.imwrite(os.path.join(output_dir, 'face_mask.png'), face_combined * 60)
cv2.imwrite(os.path.join(output_dir, 'upper_body_mask.png'), upper_combined * 60)
cv2.imwrite(os.path.join(output_dir, 'lower_body_mask.png'), lower_combined * 60)

# Create individual overlays
face_overlay = img.copy()
upper_overlay = img.copy()
lower_overlay = img.copy()

# Face overlay
unique_persons = np.unique(face_combined)[np.unique(face_combined) > 0]
for person_id in unique_persons:
    colored_mask = np.zeros_like(img)
    colored_mask[face_combined == person_id] = FACE_COLOR
    face_overlay = cv2.addWeighted(face_overlay, 1, colored_mask.astype(np.uint8), 0.5, 0)

# Upper body overlay
unique_persons = np.unique(upper_combined)[np.unique(upper_combined) > 0]
for person_id in unique_persons:
    colored_mask = np.zeros_like(img)
    colored_mask[upper_combined == person_id] = UPPER_BODY_COLOR
    upper_overlay = cv2.addWeighted(upper_overlay, 1, colored_mask.astype(np.uint8), 0.5, 0)

# Lower body overlay
unique_persons = np.unique(lower_combined)[np.unique(lower_combined) > 0]
for person_id in unique_persons:
    colored_mask = np.zeros_like(img)
    colored_mask[lower_combined == person_id] = LOWER_BODY_COLOR
    lower_overlay = cv2.addWeighted(lower_overlay, 1, colored_mask.astype(np.uint8), 0.5, 0)


cv2.imwrite(os.path.join(output_dir, 'face_overlay.jpg'), cv2.cvtColor(face_overlay, cv2.COLOR_RGB2BGR))
cv2.imwrite(os.path.join(output_dir, 'upper_body_overlay.jpg'), cv2.cvtColor(upper_overlay, cv2.COLOR_RGB2BGR))
cv2.imwrite(os.path.join(output_dir, 'lower_body_overlay.jpg'), cv2.cvtColor(lower_overlay, cv2.COLOR_RGB2BGR))

# Create combined overlay
combined_overlay = img.copy()

# face
unique_persons = np.unique(face_combined)[np.unique(face_combined) > 0]
for person_id in unique_persons:
    colored_mask = np.zeros_like(img)
    colored_mask[face_combined == person_id] = FACE_COLOR
    combined_overlay = cv2.addWeighted(combined_overlay, 1, colored_mask.astype(np.uint8), 0.4, 0)

# upper body
unique_persons = np.unique(upper_combined)[np.unique(upper_combined) > 0]
for person_id in unique_persons:
    colored_mask = np.zeros_like(img)
    colored_mask[upper_combined == person_id] = UPPER_BODY_COLOR
    combined_overlay = cv2.addWeighted(combined_overlay, 1, colored_mask.astype(np.uint8), 0.4, 0)

# lower body
unique_persons = np.unique(lower_combined)[np.unique(lower_combined) > 0]
for person_id in unique_persons:
    colored_mask = np.zeros_like(img)
    colored_mask[lower_combined == person_id] = LOWER_BODY_COLOR
    combined_overlay = cv2.addWeighted(combined_overlay, 1, colored_mask.astype(np.uint8), 0.4, 0)


legend_height = 60
legend = np.ones((legend_height, img.shape[1], 3), dtype=np.uint8) * 255

legend_items = [
    ("Face", FACE_COLOR),
    ("Upper Body (no face/neck)", UPPER_BODY_COLOR),
    ("Lower Body (Legs)", LOWER_BODY_COLOR)
]

x_offset = 10
for label, color in legend_items:
    cv2.rectangle(legend, (x_offset, 15), (x_offset + 25, 35), color, -1)
    cv2.rectangle(legend, (x_offset, 15), (x_offset + 25, 35), (0, 0, 0), 1)
    cv2.putText(legend, label, (x_offset + 35, 30), 
               cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
    x_offset += 250

cv2.putText(legend, "3-Part Body Segmentation (Face, Upper Body, Legs)", (10, 50), 
           cv2.FONT_HERSHEY_SIMPLEX, 0.5, (100, 100, 100), 1)

final_result = np.vstack([combined_overlay, legend])
cv2.imwrite(os.path.join(output_dir, 'combined_3part_overlay.jpg'), cv2.cvtColor(final_result, cv2.COLOR_RGB2BGR))


pose_vis = img.copy()
colors = plt.cm.Set1(np.linspace(0, 1, len(valid_keypoints)))[:, :3] * 255

for person_id, keypoints in enumerate(valid_keypoints):
    color = colors[person_id].astype(int).tolist()
    

    for i in range(17):
        if i < len(keypoints) and keypoints[i, 2] > 0.3:
            cv2.circle(pose_vis, tuple(keypoints[i, :2].astype(int)), 4, color, -1)
    
    for i in range(17, min(85, len(keypoints))):
        if keypoints[i, 2] > 0.2:
            cv2.circle(pose_vis, tuple(keypoints[i, :2].astype(int)), 1, color, -1)
    
    body_connections = [
        (0, 1), (0, 2), (1, 3), (2, 4),
        (5, 6), (5, 7), (7, 9), (6, 8), (8, 10),
        (5, 11), (6, 12), (11, 12),
        (11, 13), (13, 15), (12, 14), (14, 16)
    ]
    
    for pt1_idx, pt2_idx in body_connections:
        if (pt1_idx < len(keypoints) and pt2_idx < len(keypoints) and 
            keypoints[pt1_idx, 2] > 0.3 and keypoints[pt2_idx, 2] > 0.3):
            pt1 = tuple(keypoints[pt1_idx, :2].astype(int))
            pt2 = tuple(keypoints[pt2_idx, :2].astype(int))
            cv2.line(pose_vis, pt1, pt2, color, 2)

cv2.imwrite(os.path.join(output_dir, 'pose_visualization.jpg'), cv2.cvtColor(pose_vis, cv2.COLOR_RGB2BGR))


for i, keypoints in enumerate(valid_keypoints):
    body_kpts = np.sum(keypoints[:17, 2] > 0.3)
    face_kpts = np.sum(keypoints[17:85, 2] > 0.2) if len(keypoints) > 85 else 0
    upper_kpts = np.sum([keypoints[idx, 2] > 0.3 for idx in UPPER_BODY_POINTS if idx < len(keypoints)])
    print(f"  Person {i+1}: {body_kpts}/17 body, {face_kpts}/68 face, {upper_kpts}/8 upper body keypoints")