In [None]:
# Compute AP and AP 50 (GT vs SAM2)
import json
import numpy as np
import cv2
import matplotlib.pyplot as plt
from pycocotools import mask as mask_utils
from sklearn.metrics import average_precision_score
import os, sys

# Load the JSON file for GT data
json_path = "valid.json"  # Replace with your actual path
with open(json_path, "r") as f:
    data = json.load(f)

# Load the SAM2 output data
# sam2_file_path = './input/W1PcSf253cs_161.json' 
# with open(sam2_file_path, 'r') as f:
#     sam2_data = json.load(f)

def compute_ap(gt_masks, pred_masks, iou_threshold=0.5):
    # Compute Average Precision (AP) using sklearn
    ap = average_precision_score(gt_masks.flatten() > 0, pred_masks.flatten() > 0)
    return ap

def compute_iou(gt_mask, pred_mask):
    intersection = np.logical_and(gt_mask, pred_mask)
    union = np.logical_or(gt_mask, pred_mask)

    # Handle division by zero if union is zero
    if np.sum(union) == 0:
        return 0.0  # No overlap, return IoU as 0

    return np.sum(intersection) / np.sum(union)

def compute_ap50(gt_masks, pred_masks, iou_threshold=0.5):
    ground_truth = []
    predictions = []

    for gt_mask, pred_mask in zip(gt_masks, pred_masks):
        # Compute IoU for each pair of ground truth and predicted masks
        iou = compute_iou(gt_mask, pred_mask)

        # If IoU is greater than or equal to the threshold, it's a true positive
        if iou >= iou_threshold:
            ground_truth.append(1)  # True positive
        else:
            ground_truth.append(0)  # False positive

        predictions.append(iou)  # Use IoU as the predicted confidence score

    # Compute Average Precision (AP) using sklearn
    ap = average_precision_score(ground_truth, predictions)
    return ap

input= "input_box"
path = f'/content/{input}'
dirs = os.listdir( path )
ap_list=[]
ap50_list=[]
from_0_25_list=[]
from_25_50_list=[]
from_50_75_list=[]
from_75_100_list=[]

# Print all the files and directories
for file in dirs:
  file_name=file.split('.')[0]
  print(file_name)
  sam2_file_path = f'./{input}/{file_name}.json'  # Replace with your actual SAM2 output file path
  if file_name!= "":
    with open(sam2_file_path, 'r') as f:
      sam2_data = json.load(f)

    # Search for the video ID based on the file name
    video_id = None
    frame_height=None
    frame_width=None
    for video in data['videos']:
        if any(file_name in fname for fname in video['file_names']):
            video_id = video['id']
            frame_height = video['height']
            frame_width = video['width']

            break

    # print("running file: ", file_name)
    # print("id is: ", video_id)
    # Iterate through video 50 frames
    video_annotations = [ann for ann in data["annotations"] if ann["video_id"] == video_id]

    # print("frame_files: ", frame_files)
    # print("height is: ", frame_height)
    # print("width is: ", frame_width)

    # Initialize lists for GT and predicted masks
    gt_masks = []
    pred_masks = []
    ap=[]
    ap50=[]
    # Iterate through each frame
    for frame_idx, frame_name in enumerate(frame_files):
        # Create a blank image for GT mask
        gt_img = np.zeros((frame_height, frame_width), dtype=np.uint8)

        # Apply segmentation masks for this frame from GT data
        for annotation in video_annotations:
            segmentation = annotation["segmentations"]

            if isinstance(segmentation, list) and len(segmentation) > frame_idx:
                seg = segmentation[frame_idx]  # Get segmentation for the current frame
                if isinstance(seg, dict) and "counts" in seg:
                    mask = mask_utils.decode(mask_utils.frPyObjects(seg, frame_height, frame_width))
                    gt_img[mask > 0] = 1  # Set ground truth pixels to 1

        # Append the GT mask to the list
        gt_masks.append(gt_img)

        # Get the corresponding polygons from SAM2 output
        sam2_polygons = sam2_data[str(frame_idx)][0]['polygons']

        # Create a blank image for predicted mask from SAM2 output
        pred_img = np.zeros((frame_height, frame_width), dtype=np.uint8)

        # Draw the polygons for SAM2 output on the pred_img
        for polygon in sam2_polygons:
            polygon = np.array(polygon, dtype=np.int32)  # Ensure coordinates are of integer type (CV_32S)
            polygon = polygon[:, [1, 0]]  # Swap to (x, y) format
            cv2.fillPoly(pred_img, [polygon], 1)  # Fill the polygon region with 1

        # Append the predicted mask to the list
        pred_masks.append(pred_img)

        # # Plot GT and predicted masks side by side
        # plt.figure(figsize=(10, 5))

        # # Ground Truth mask
        # plt.subplot(1, 2, 1)
        # plt.imshow(gt_img, cmap='gray')
        # plt.title(f'Ground Truth - Frame {frame_idx}')
        # plt.axis('off')

        # # SAM2 predicted mask
        # plt.subplot(1, 2, 2)
        # plt.imshow(pred_img, cmap='gray')
        # plt.title(f'SAM2 Prediction - Frame {frame_idx}')
        # plt.axis('off')

        # plt.show()
        ap.append(compute_ap(gt_img, pred_img))
        ap50.append(compute_ap50(gt_img, pred_img))

    # Compute the Average Precision (AP) for this video
    # print("ap list: ", ap)
    # print("ap50 list: ", ap50)
    ap = np.mean(ap)
    ap_list.append(ap)
    ap50 = np.mean(ap50)
    ap50_list.append(ap50)
    if ap > 0 and ap < 0.25 :
      from_0_25_list.append(file_name)
    elif ap > 0.25 and ap < 0.5:
      from_25_50_list.append(file_name)
    elif ap > 0.5 and ap < 0.75:
      from_50_75_list.append(file_name)
    elif ap > 0.75:
      from_75_100_list.append(file_name)
    # print(f"Average Precision for Video {video_id}: {ap}")
    # print(f"Average Precision 50 for Video {video_id}: {ap50}")

plt.hist(ap_list, alpha=0.5, label='ap')
plt.hist(ap50_list, alpha=0.5, label='ap50')
plt.legend(loc='upper right')
plt.show()
print("from 0 to 25: ", from_0_25_list)
print("from 25 to 50: ", from_25_50_list)
print("from 50 to 75: ", from_50_75_list)
print("from 75 to 100: ", from_75_100_list)