In [None]:
import cv2
import json
import os
import glob
import numpy as np
import torch
from ultralytics.models.sam import SAM2VideoPredictor


point click segment for image

In [None]:
current_object_points = []
objects_points = []  

def click_event(event, x, y, flags, param):
    global current_object_points, image
    if event == cv2.EVENT_LBUTTONDOWN:
        # Thếm điểm vào danh sách điểm của object hiện tại
        current_object_points.append((x, y))
        # Ve hinh tron tai diem click
        cv2.circle(image, (x, y), 5, (0, 0, 255), -1)
        # Display the coordinate near the point
        cv2.putText(image, f"({x},{y})", (x + 10, y - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 1)
        cv2.imshow("Image", image)

# Load image 
orig_image = cv2.imread(r"C:\Users\phann\Pictures\Screenshot 2025-03-03 121716.png")
if orig_image is None:
    print("Error loading image")
    exit()

# Resize image 
desired_width = 640
desired_height = 640
image = cv2.resize(orig_image, (desired_width, desired_height))
print(f"Image resized to {desired_width}x{desired_height}")

cv2.imshow("Image", image)
cv2.setMouseCallback("Image", click_event)


while True:
    key = cv2.waitKey(1) & 0xFF
    if key == ord('n'):
        if len(current_object_points) == 2:
            # Thêm object vào danh sách object
            objects_points.append([list(pt) for pt in current_object_points])
            current_object_points.clear()
            print(f"Object {len(objects_points)} confirmed. Click two points for next object.")
        else:
            print("Please select exactly 2 points for the object before pressing 'n'.")
    elif key == ord('p'):
        # Nếu có 2 object được chọn, thêm vào và thoát
        if len(current_object_points) == 2:
            objects_points.append([list(pt) for pt in current_object_points])
        print("\nFinal object points (each object as a list of 2 points):")
        for idx, obj in enumerate(objects_points, start=1):
            print(f"Object {idx}: {obj}")
        # Tạo label cho object
        labels = [[1, 1] for _ in objects_points]
        print("\nLabels:", labels)
        break
    elif key == ord('q'):
        print("Exiting without saving object coordinates.")
        objects_points = []
        break

cv2.waitKey(0)
cv2.destroyAllWindows()


click point for video folder

In [None]:
def extract_first_frame(video_path):
    # Extract và trả về frame đầu tiên từ video
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    cap.release()
    if not ret or frame is None:
        print(f"Unable to read first frame from {video_path}.")
        return None
    return frame

def interactive_point_selection(frame, window_name="Select Points", max_points=3):
    """
    Display the frame for interactive point selection.
    
    - Left click: add a positive point (label 1)
    - Right click: add a negative point (label 0)
    - Maximum of `max_points` allowed per object.
    - Press 'z' to undo the last point.
    - Press 'n' to confirm the current object's points and start a new object.
      (This is allowed only when exactly `max_points` points have been added.)
    - Press 'p' when finished.
    - Press 'q' to quit without saving.
    
    Returns a tuple:
      (objects_points, objects_labels)
      where objects_points is a list of objects (each object is a list of points [x, y])
      and objects_labels is a list of objects (each object is a list of corresponding labels).
    """
    original_img = frame.copy()
    # Lists for confirmed objects
    objects_points = []  
    objects_labels = []  
    # For the current object being drawn
    current_points = []
    current_labels = []
    # Show confirmed points in the display (green)
    confirmed_points = []
    confirmed_labels = []
    
    def redraw():
        temp = original_img.copy()
        # Draw confirmed points in green
        for pt, lab in zip(confirmed_points, confirmed_labels):
            cv2.circle(temp, pt, 5, (0, 255, 0), -1)
            cv2.putText(temp, f"{pt}", (pt[0]+10, pt[1]-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
        # Draw current object's points: red for positive, blue for negative
        # Red: label 1 (chọn object), Blue: label 0 (né object tránh bị lan nhầm)
        for pt, lab in zip(current_points, current_labels):
            color = (0, 0, 255) if lab == 1 else (255, 0, 0)
            cv2.circle(temp, pt, 5, color, -1)
            cv2.putText(temp, f"{pt}", (pt[0]+10, pt[1]-10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
        cv2.imshow(window_name, temp)
    
    def mouse_callback(event, x, y, flags, param):
        nonlocal current_points, current_labels
        if event == cv2.EVENT_LBUTTONDOWN:
            if len(current_points) >= max_points:
                print(f"Maximum {max_points} points allowed per object. Press 'n' to confirm.")
                return
            current_points.append((x, y))
            current_labels.append(1)
            print(f"Added positive point: ({x}, {y})")
            redraw()
        elif event == cv2.EVENT_RBUTTONDOWN:
            if len(current_points) >= max_points:
                print(f"Maximum {max_points} points allowed per object. Press 'n' to confirm.")
                return
            current_points.append((x, y))
            current_labels.append(0)
            print(f"Added negative point: ({x}, {y})")
            redraw()
    
    cv2.namedWindow(window_name)
    cv2.imshow(window_name, original_img)
    cv2.setMouseCallback(window_name, mouse_callback)
    
    print("Instructions:")
    print("  Left click: add positive point (label=1)")
    print("  Right click: add negative point (label=0)")
    print(f"  Maximum {max_points} points allowed per object.")
    print("  Press 'z' to undo last point (current object only)")
    print("  Press 'n' to confirm current object (only when exactly the limit is reached) and start a new object")
    print("  Press 'p' to finish selection")
    print("  Press 'q' to quit without saving")
    
    while True:
        key = cv2.waitKey(0) & 0xFF
        if key == ord('n'):
            if len(current_points) < max_points:
                print(f"Please add {max_points - len(current_points)} more point(s) before confirming.")
                continue
            else:
                objects_points.append([list(pt) for pt in current_points])
                objects_labels.append(current_labels.copy())
                print(f"Confirmed object {len(objects_points)} with points: {current_points}")
                # Add confirmed points lên display
                confirmed_points.extend(current_points)
                confirmed_labels.extend(current_labels)
                current_points = []
                current_labels = []
                redraw()
        elif key == ord('z'):
            if current_points:
                removed = current_points.pop()
                removed_label = current_labels.pop()
                print(f"Undid point: {removed} with label {removed_label}")
                redraw()
            else:
                print("No point to undo.")
        elif key == ord('p'):
            # Nếu object hiện tại có ít hơn max_points, không cho phép confirm
            if current_points and len(current_points) != max_points:
                print(f"Current object has only {len(current_points)} point(s); please reach {max_points} before confirming or undo.")
                continue
            elif current_points:
                objects_points.append([list(pt) for pt in current_points])
                objects_labels.append(current_labels.copy())
                print(f"Confirmed object {len(objects_points)} with points: {current_points}")
            break
        elif key == ord('q'):
            objects_points = []
            objects_labels = []
            break
    cv2.destroyWindow(window_name)
    return objects_points, objects_labels

video_folder = r"C:\Users\phann\Documents\job_project\sam2\vid"  
video_files = glob.glob(os.path.join(video_folder, "*.mp4"))

for video_path in video_files:
    base_name = os.path.splitext(os.path.basename(video_path))[0]
    print(f"\nProcessing video: {video_path}")
    
    first_frame = extract_first_frame(video_path)
    if first_frame is None:
        continue
    
    # Resize 
    # first_frame = cv2.resize(first_frame, (640, 480))
    
    points, labels = interactive_point_selection(first_frame, window_name=f"Select Points - {base_name}", max_points=3)
    if points:
        # Lưu dữ liệu điểm vào file JSON
        points_data = {"points": points, "labels": labels}
        output_filename = os.path.join(video_folder, f"{base_name}_points.json")
        with open(output_filename, "w") as f:
            json.dump(points_data, f, indent=2)
        print(f"Saved point data for {base_name} to {output_filename}")
    else:
        print(f"No points selected for {base_name}.")



Processing video: C:\Users\phann\Documents\job_project\sam2\vid\002.mp4
Instructions:
  Left click: add positive point (label=1)
  Right click: add negative point (label=0)
  Maximum 3 points allowed per object.
  Press 'z' to undo last point (current object only)
  Press 'n' to confirm current object (only when exactly the limit is reached) and start a new object
  Press 'p' to finish selection
  Press 'q' to quit without saving
Added positive point: (210, 365)
Added positive point: (416, 361)
Added negative point: (451, 347)
Confirmed object 1 with points: [(210, 365), (416, 361), (451, 347)]
Added positive point: (475, 378)
Added negative point: (428, 352)
Added positive point: (541, 187)
Confirmed object 2 with points: [(475, 378), (428, 352), (541, 187)]
Added positive point: (598, 321)
Added positive point: (625, 338)
Added positive point: (701, 167)
Confirmed object 3 with points: [(598, 321), (625, 338), (701, 167)]
Added positive point: (757, 205)
Added positive point: (863, 

Run video predict

In [None]:
def mask_to_coco_segmentation(mask_tensor, threshold=0.5):
    """
    Convert a binary mask (torch.Tensor) to COCO polygon segmentation format.
    Returns a list of polygons and the computed area.
    """
    mask_np = (mask_tensor.cpu().numpy() > threshold).astype(np.uint8) * 255
    contours, _ = cv2.findContours(mask_np, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    segmentation = []
    for cnt in contours:
        cnt = cnt.flatten().tolist()
        if len(cnt) >= 6:  # ít nhất 6 điểm cho một segment
            segmentation.append(cnt)
    area = float(np.sum(mask_np > 0))
    return segmentation, area

# Folder phải chứa video và file json chứa points và labels
video_folder = r"C:\Users\phann\Documents\job_project\sam2\pig_vid"  
video_files = glob.glob(os.path.join(video_folder, "*.mp4"))

for video_path in video_files:
    base_name = os.path.splitext(os.path.basename(video_path))[0]
    points_filename = os.path.join(video_folder, f"{base_name}_points.json")
    if not os.path.exists(points_filename):
        print(f"Points file {points_filename} not found for video {video_path}. Skipping.")
        continue

    # Load points and labels from JSON file
    with open(points_filename, "r") as f:
        points_data = json.load(f)
    objects_points = points_data["points"]
    labels = points_data["labels"]

    # Setup SAM2VideoPredictor 
    overrides = dict(conf=0.25, task="segment", mode="predict", imgsz=1024, model="sam2.1_t.pt")
    predictor = SAM2VideoPredictor(overrides=overrides)
    
    # Run inference 
    results = predictor(source=video_path, points=objects_points, labels=labels)
    
    # Save  in COCO segmentation format
    frame_index = 0
    ann_global_id = 1  # annotation id counter
    for result in results:
        if frame_index % 15 == 0:
            image_filename = f"{base_name}_frame_{frame_index}.jpg"
            result.save(filename=image_filename)
            
            # Build COCO-format annotations 
            annotations = []
            if result.masks is not None:
                mask_tensor = result.masks.data  # shape (N, H, W)
                if result.boxes is not None:
                    boxes = result.boxes.xyxy.cpu().numpy()  # [x1, y1, x2, y2]
                else:
                    boxes = [None] * len(mask_tensor)
                for mask, box in zip(mask_tensor, boxes):
                    segmentation, area = mask_to_coco_segmentation(mask)
                    if box is not None:
                        x1, y1, x2, y2 = box
                        bbox = [float(x1), float(y1), float(x2 - x1), float(y2 - y1)]
                    else:
                        bbox = []
                    annotation = {
                        "id": ann_global_id,
                        "image_id": frame_index,  
                        "category_id": 1,         
                        "segmentation": segmentation,
                        "area": area,
                        "bbox": bbox,
                        "iscrowd": 0
                    }
                    annotations.append(annotation)
                    ann_global_id += 1
            
            coco_output = {
                "images": [{"id": frame_index, "file_name": image_filename}],
                "annotations": annotations,
                "categories": [{"id": 1, "name": "pig"}]
            }
            json_filename = f"{base_name}_frame_{frame_index}.json"
            with open(json_filename, "w") as f:
                json.dump(coco_output, f, indent=2)
            print(f"Saved frame {frame_index} of {base_name} as '{image_filename}' with annotations in '{json_filename}'.")
        frame_index += 1
