I need to count all the frequencies of all the triplets in each of the video sequence.

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../')

In [None]:
import numpy as np

results_cholecT50_chal = [np.nan, np.nan, np.nan, np.nan, 1.42380420e-01, 5.12557663e-04, np.nan, 3.32626728e-01,
          1.94041245e-02, 4.41499286e-03, 1.51810978e-02, 1.05866348e-01, 7.66676512e-01, 
          3.34643930e-01, np.nan, np.nan, np.nan, 3.49844877e-01, 2.36705437e-01, 6.40751279e-01,
          np.nan, np.nan, 6.16255123e-01, 3.82526038e-01, np.nan, np.nan, np.nan, np.nan, np.nan, 
          9.38378809e-01, np.nan, 6.81760326e-01, np.nan, 2.39063315e-01, 6.76337083e-01, np.nan, 
          1.46892996e-01, np.nan, np.nan, np.nan, 2.24427652e-02, np.nan, np.nan, np.nan, np.nan, 
          np.nan, 5.29207639e-03, np.nan, 6.28930818e-03, np.nan, np.nan, np.nan, 8.23852417e-04, 
          np.nan, 4.13746771e-03, 5.78127295e-02, np.nan, 9.06361298e-02, 5.12866891e-01, np.nan, 
          7.72888383e-01, np.nan, 5.74473847e-02, 1.00791122e-03, np.nan, np.nan, np.nan, np.nan, 
          5.51408207e-01, 7.13270129e-01, np.nan, np.nan, 2.06043956e-01, np.nan, np.nan, np.nan, 
          np.nan, 1.03012725e-01, 7.70445042e-01, 6.79152307e-01, np.nan, np.nan, 7.19090642e-02, 
          np.nan, np.nan, np.nan, np.nan, np.nan, 4.20168067e-03, np.nan, 1.05711606e-02, np.nan, 
          np.nan, np.nan]

# Count NaNs
nan_count = np.sum(np.isnan(results_cholecT50_chal))
print("Number of NaNs:", nan_count)
print(np.nanmean(results_cholecT50_chal))

Number of NaNs: 55
0.2890225939592308


In [3]:
nan_indices = np.where(np.isnan(results_cholecT50_chal))[0]  # [0] extracts the indices from the tuple

print("Indices of NaNs:", nan_indices)

Indices of NaNs: [ 0  1  2  3  6 14 15 16 20 21 24 25 26 27 28 30 32 35 37 38 39 41 42 43
 44 45 47 49 50 51 53 56 59 61 64 65 66 67 70 71 73 74 75 76 80 81 83 84
 85 86 87 89 91 92 93]


In [4]:
from utils.general.dataset_variables import TripletSegmentationVariables
from os.path import join

In [None]:
import json
from collections import Counter
from typing import List, Dict, Tuple

def count_triplets(video_files,
                   triplet_mapping):
    """
    Counts the frequency of each triplet in a set of CholecT50 videos and identifies missing triplets.
    
    Args:
        video_files (List[str]): List of paths to video JSON annotation files.
        triplet_mapping (Dict[int, str]): Dictionary mapping triplet IDs to triplet names.

    Returns:
        Tuple[Dict[str, int], List[str]]: 
            - Dictionary with triplet names as keys and their counts as values.
            - List of missing triplet names (triplets that do not appear in the dataset).
    """
    triplet_counter = Counter()

    # Iterate through each video annotation file
    for file_path in video_files:
        with open(file_path, "r") as f:
            data = json.load(f)
        
        # Extract annotations from JSON
        annotations = data.get("annotations", {})

        for frame_id, frame_annotations in annotations.items():
            for annotation in frame_annotations:
                triplet_id = annotation[0] # First element in list is the triplet ID
                if triplet_id != -1:
                    triplet_id = str(triplet_id + 1) # move zero to one starting spot. 
                    if triplet_id in triplet_mapping:
                        triplet_name = triplet_mapping[triplet_id]
                        triplet_counter[triplet_name] += 1
                    else: 
                        raise ValueError(f'cant find triplet_id {triplet_id} for frame_id {frame_id} ')   

    # Identify missing triplets
    present_triplets = set(triplet_counter.keys())
    all_triplets = set(triplet_mapping.values())
    missing_triplets = sorted(all_triplets - present_triplets)

    return dict(triplet_counter), missing_triplets


In [8]:
# Example Usage:

ann_dir = '../../my_rendezvous/data/CholecT50/labels'
test_list =  [92, 96, 103, 110, 111]
test_list_path = [ join(ann_dir, f'VID{test_vid_id}.json')  for test_vid_id in test_list]
triplet_mapping = TripletSegmentationVariables.categories['triplet']

print(test_list_path)

triplet_counts, missing_triplets = count_triplets(test_list_path, triplet_mapping)
 
print("Triplet Counts:", triplet_counts)
print("Missing Triplets:", missing_triplets)

['../../my_rendezvous/data/CholecT50/labels\\VID92.json', '../../my_rendezvous/data/CholecT50/labels\\VID96.json', '../../my_rendezvous/data/CholecT50/labels\\VID103.json', '../../my_rendezvous/data/CholecT50/labels\\VID110.json', '../../my_rendezvous/data/CholecT50/labels\\VID111.json']
Triplet Counts: {'grasper,null_verb,null_target': 719, 'grasper,grasp,gallbladder': 2758, 'grasper,retract,gallbladder': 1194, 'hook,null_verb,null_target': 962, 'grasper,grasp,peritoneum': 324, 'hook,dissect,cystic_duct': 884, 'hook,coagulate,blood_vessel': 35, 'hook,dissect,cystic_artery': 240, 'hook,cut,blood_vessel': 13, 'clipper,null_verb,null_target': 37, 'clipper,clip,cystic_duct': 161, 'clipper,clip,cystic_artery': 96, 'scissors,cut,cystic_artery': 44, 'scissors,cut,cystic_duct': 78, 'bipolar,coagulate,blood_vessel': 74, 'bipolar,null_verb,null_target': 57, 'hook,dissect,gallbladder': 2147, 'irrigator,aspirate,fluid': 179, 'grasper,retract,liver': 1218, 'grasper,grasp,specimen_bag': 646, 'grasp

do the nans matchup to the missing triplets? 

In [9]:
missing_triplet_ids = sorted([int(triplet_id)-1 for triplet_id, name in triplet_mapping.items() if name in missing_triplets])
print(missing_triplet_ids)
print(len(missing_triplet_ids))
print(len(triplet_counts))

[0, 1, 2, 3, 6, 14, 15, 16, 20, 21, 24, 25, 26, 27, 28, 30, 32, 35, 37, 38, 39, 41, 42, 43, 44, 45, 47, 49, 50, 51, 53, 56, 59, 61, 64, 65, 66, 67, 70, 71, 73, 74, 75, 76, 80, 81, 83, 84, 85, 86, 87, 89, 91, 92, 93]
55
45


In [10]:
if missing_triplet_ids == nan_indices.tolist():
    print("The lists are exactly the same (including order).")
else:
    print("The lists are different.")


The lists are exactly the same (including order).


nans represent the missing ground-truth only. In this example. However that means, somethings that should be zeros are Nans  

You should only have NaNs when there is no prediction and no ground_truth. 



1. When there is 0 ground truth and there are some predictions, these should be zero. 
2. When there is 0 prediction and there exists ground_truth, this should also be zero

lets check these out. 

So all the predictions are being penalized. 

### 1. When there is 0 ground truth and there are some predictions, these should be zero. 

In [11]:
import json
from collections import Counter
from typing import Dict, Tuple, List

def count_predicted_triplets(prediction_file: str, 
                             triplet_mapping: Dict[int, str], 
                             threshold: float = 0.5) -> Tuple[Dict[str, int], List[str]]:
    """
    Counts the frequency of predicted triplets based on a given threshold.
    
    Args:
        prediction_file (str): Path to the JSON prediction file.
        triplet_mapping (Dict[int, str]): Dictionary mapping triplet indices to names.
        threshold (float): Threshold to determine if a triplet is predicted.
    
    Returns:
        Tuple[Dict[str, int], List[str]]: 
            - Dictionary with triplet names as keys and their count as values.
            - List of missing triplets (triplets that are in the mapping but not predicted).
    """
    triplet_counter = Counter()

    # Load predictions from file
    with open(prediction_file, "r") as f:
        data = json.load(f)

    # Iterate over each frame prediction
    for frame_id, predictions in data.items():
        triplet_scores = predictions["triplet"]

        # Identify indices where the threshold is exceeded
        predicted_triplet_ids = [str(i+1) for i, score in enumerate(triplet_scores) if float(score) >= threshold]
        

        # Map indices to triplet names
        for triplet_id in predicted_triplet_ids:
            if triplet_id in triplet_mapping:
                triplet_name = triplet_mapping[triplet_id]
                triplet_counter[triplet_name] += 1
                
                print(f'frame_id {frame_id} - Contains {triplet_name}')
            else:
                raise ValueError(f"Triplet ID {triplet_id} not found in mapping.")

    # Identify missing triplets
    present_triplets = set(triplet_counter.keys())
    all_triplets = set(triplet_mapping.values())
    missing_triplets = sorted(all_triplets - present_triplets)

    return dict(triplet_counter), missing_triplets

In [None]:
# Example Usage:
prediction_file = "../../my_rendezvous/results/cholecT50-challenge-old-ivtmetrics/results_rendezvous_l8_cholectcholect50-challenge_k0_batchnorm_lowres.json"  # Replace with actual file
triplet_mapping = TripletSegmentationVariables.categories['triplet']

predicted_counts, missing_predicted_triplets = count_predicted_triplets(prediction_file, triplet_mapping)




In [15]:
print("Predicted Triplet Counts:", predicted_counts)
print("Missing Triplets:", missing_predicted_triplets)

Predicted Triplet Counts: {'grasper,grasp,specimen_bag': 588, 'grasper,null_verb,null_target': 638, 'grasper,retract,gallbladder': 3118, 'grasper,retract,omentum': 282, 'grasper,grasp,gallbladder': 126, 'hook,dissect,omentum': 221, 'hook,dissect,gallbladder': 3666, 'hook,dissect,cystic_duct': 152, 'hook,dissect,cystic_plate': 77, 'clipper,null_verb,null_target': 6, 'clipper,clip,cystic_duct': 121, 'scissors,cut,cystic_duct': 47, 'grasper,retract,liver': 1391, 'bipolar,coagulate,liver': 387, 'bipolar,coagulate,cystic_plate': 12, 'bipolar,coagulate,gallbladder': 24, 'grasper,pack,gallbladder': 2, 'bipolar,null_verb,null_target': 13, 'bipolar,dissect,gallbladder': 3, 'bipolar,coagulate,abdominal_wall_cavity': 20, 'grasper,retract,gut': 11, 'hook,null_verb,null_target': 75, 'hook,dissect,cystic_artery': 16, 'bipolar,dissect,cystic_duct': 1, 'clipper,clip,cystic_artery': 28, 'grasper,dissect,gallbladder': 2, 'grasper,retract,cystic_plate': 3, 'irrigator,aspirate,fluid': 7, 'scissors,cut,cys

In [16]:
print(len(missing_predicted_triplets))
print(len(predicted_counts))

65
35


In [17]:
nan_indices = np.where(np.isnan(results_cholecT50_chal))[0]  # [0] extracts the indices from the tuple

print("Indices of NaNs:", nan_indices)
print(len(nan_indices))

Indices of NaNs: [ 0  1  2  3  6 14 15 16 20 21 24 25 26 27 28 30 32 35 37 38 39 41 42 43
 44 45 47 49 50 51 53 56 59 61 64 65 66 67 70 71 73 74 75 76 80 81 83 84
 85 86 87 89 91 92 93]
55


In [18]:
predicted_triplet_ids = sorted([int(triplet_id)-1 for triplet_id, name in triplet_mapping.items() if name in predicted_counts.keys()])

In [19]:
len(predicted_triplet_ids)

35

In [20]:
predicted_set = set(predicted_triplet_ids)
nan_set = set(nan_indices)

nans_that_were_predicted = predicted_set & nan_set  # In predicted and nans
nans_that_were_predicted


{1, 16, 20, 21, 27, 28, 51, 59, 61, 75}

In [21]:
for pred_id in nans_that_were_predicted:
    pred_name = triplet_mapping[str(pred_id+1)]
    print(f'pred_id {pred_id} {pred_name} {predicted_counts[pred_name]}')
    print(results_cholecT50_chal[pred_id]) 

pred_id 1 grasper,dissect,gallbladder 2
nan
pred_id 59 hook,dissect,cystic_plate 77
nan
pred_id 75 scissors,dissect,gallbladder 5
nan
pred_id 16 grasper,retract,cystic_plate 3
nan
pred_id 51 hook,coagulate,gallbladder 1
nan
pred_id 20 grasper,retract,omentum 282
nan
pred_id 21 grasper,retract,peritoneum 1
nan
pred_id 27 bipolar,coagulate,cystic_plate 12
nan
pred_id 28 bipolar,coagulate,gallbladder 24
nan
pred_id 61 hook,dissect,omentum 221
nan


There are 10 classes which have predictions but no ground truth and it is reported as NaN which is wrong. 

### 2. When there is 0 prediction and there exists ground_truth, this should also be zero

for this one, we need to check all the places that have missing predictions and see if it aligns with ground_truth that exists. It should give zero

There are 35 predictions but there 45 ground_truths, hence there must at least be 10 that have zero in ivt but looking at it, there are no zeros, something is wrong here. 

In [22]:
np.array(results_cholecT50_chal) == 0 


array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False])

There is none that is zero so something is fishy here. 

In [23]:
predicted_set = set(predicted_triplet_ids)

gt_triplet_ids = sorted([int(triplet_id)-1 for triplet_id, name in triplet_mapping.items() if name in triplet_counts.keys()])
gt_set = set(gt_triplet_ids)

In [24]:


extra_in_predicted = predicted_set - gt_set  #if there is even any extra
extra_in_gt = gt_set - predicted_set  # I expect at least ten, it could be more. 

if not extra_in_predicted and not extra_in_gt:
    print("The lists contain the same elements.")
else:
    print("Differences found!")
    if extra_in_predicted:
        print("Extra elements in predicted indices:", extra_in_predicted)
    if extra_in_gt:
        print("Extra elements in gt_indices:", extra_in_gt)


Differences found!
Extra elements in predicted indices: {1, 59, 75, 16, 51, 20, 21, 27, 28, 61}
Extra elements in gt_indices: {4, 5, 8, 9, 10, 11, 23, 31, 33, 40, 46, 48, 54, 55, 62, 63, 72, 77, 88, 90}


In [25]:
print(extra_in_predicted)
print(nans_that_were_predicted)

{1, 59, 75, 16, 51, 20, 21, 27, 28, 61}
{1, 59, 75, 16, 51, 20, 21, 27, 28, 61}


That makes sense, these things that we have predictions for , but no groundtruth, they were teh nans that have predictions. 

This then implies that there are another 20 which should have been zeros that were not zeros. They are the groundtruths that have no predictions, but somehow are not scored as zeros

In [None]:
for gt_id_that_should_be_zero in extra_in_gt:
    gt_name = triplet_mapping[str(gt_id_that_should_be_zero+1)]
    print(f'gt_id_that_should_be_zero {gt_id_that_should_be_zero} {gt_name}')
    print(f'frequency in gt {triplet_counts[gt_name]}')
    print(f'frequency in pred {predicted_counts.get(gt_name, 0)}')
    print(f'is it in missing from pred {gt_name in missing_predicted_triplets}')
    print('result on old ivtmetrics' , results_cholecT50_chal[gt_id_that_should_be_zero]) 
    print('=================================================')
    

They are essentially passing the whole triplet vector in, so they are not doing what we are doing, but they are passing all the triplet classes in!!! That is why we have predictions even when things are missing. 

But there should be a threshold, there should be a threshold, even when working with logits.

Questions to answer? 

Why does the new version not have any NaN at all? Their method for catching when it is NaN is wrong, likely the output from sklearn is no longer 

In [28]:
from sklearn.metrics import average_precision_score

# Example true labels
y_true = [0, 0, 1, 1]


# Probabilities (e.g., logistic regression)
y_scores_proba = [0.1, 0.2, 0.3, 0.9]  # Probabilities of class 1

# Compute Average Precision Score
ap_proba = average_precision_score(y_true, y_scores_proba)

print("AP Score using probabilities:", ap_proba)


AP Score using probabilities: 1.0


It does not use threshold of 0.5, that is why I am making mistakes

So my whole analysis is wrong. 

As classification AP is not the same as Detection AP, very different in their calculations

My analysis is very wrong and i realize I have being looking at things from the view of object detection

The same analysis is required in the detection one.

Under detection rules, what would be the triplet score?

In [29]:
results_cholecT50_chal_copy = results_cholecT50_chal[:]

In [30]:
results_cholecT50_chal_copy = np.array(results_cholecT50_chal_copy)
results_cholecT50_chal_copy

array([           nan,            nan,            nan,            nan,
       1.42380420e-01, 5.12557663e-04,            nan, 3.32626728e-01,
       1.94041245e-02, 4.41499286e-03, 1.51810978e-02, 1.05866348e-01,
       7.66676512e-01, 3.34643930e-01,            nan,            nan,
                  nan, 3.49844877e-01, 2.36705437e-01, 6.40751279e-01,
                  nan,            nan, 6.16255123e-01, 3.82526038e-01,
                  nan,            nan,            nan,            nan,
                  nan, 9.38378809e-01,            nan, 6.81760326e-01,
                  nan, 2.39063315e-01, 6.76337083e-01,            nan,
       1.46892996e-01,            nan,            nan,            nan,
       2.24427652e-02,            nan,            nan,            nan,
                  nan,            nan, 5.29207639e-03,            nan,
       6.28930818e-03,            nan,            nan,            nan,
       8.23852417e-04,            nan, 4.13746771e-03, 5.78127295e-02,
      

extra_in_predicted refer to the ground_truths that do not exist but have predictions, the score should be zero

In [31]:
results_cholecT50_chal_copy[list(extra_in_predicted)] = 0
results_cholecT50_chal_copy

array([           nan, 0.00000000e+00,            nan,            nan,
       1.42380420e-01, 5.12557663e-04,            nan, 3.32626728e-01,
       1.94041245e-02, 4.41499286e-03, 1.51810978e-02, 1.05866348e-01,
       7.66676512e-01, 3.34643930e-01,            nan,            nan,
       0.00000000e+00, 3.49844877e-01, 2.36705437e-01, 6.40751279e-01,
       0.00000000e+00, 0.00000000e+00, 6.16255123e-01, 3.82526038e-01,
                  nan,            nan,            nan, 0.00000000e+00,
       0.00000000e+00, 9.38378809e-01,            nan, 6.81760326e-01,
                  nan, 2.39063315e-01, 6.76337083e-01,            nan,
       1.46892996e-01,            nan,            nan,            nan,
       2.24427652e-02,            nan,            nan,            nan,
                  nan,            nan, 5.29207639e-03,            nan,
       6.28930818e-03,            nan,            nan, 0.00000000e+00,
       8.23852417e-04,            nan, 4.13746771e-03, 5.78127295e-02,
      

extra in gt refers to pred that do not exist but there are groudtruths present, all missing false negatives

In [32]:
results_cholecT50_chal_copy[list(extra_in_gt)] = 0
results_cholecT50_chal_copy

array([           nan, 0.00000000e+00,            nan,            nan,
       0.00000000e+00, 0.00000000e+00,            nan, 3.32626728e-01,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       7.66676512e-01, 3.34643930e-01,            nan,            nan,
       0.00000000e+00, 3.49844877e-01, 2.36705437e-01, 6.40751279e-01,
       0.00000000e+00, 0.00000000e+00, 6.16255123e-01, 0.00000000e+00,
                  nan,            nan,            nan, 0.00000000e+00,
       0.00000000e+00, 9.38378809e-01,            nan, 0.00000000e+00,
                  nan, 0.00000000e+00, 6.76337083e-01,            nan,
       1.46892996e-01,            nan,            nan,            nan,
       0.00000000e+00,            nan,            nan,            nan,
                  nan,            nan, 0.00000000e+00,            nan,
       0.00000000e+00,            nan,            nan, 0.00000000e+00,
       8.23852417e-04,            nan, 0.00000000e+00, 0.00000000e+00,
      

In [33]:
np.nanmean(results_cholecT50_chal_copy)

0.18780638325340818

Well this is not actually correct as well, the values of the AP the plotting is different. But it is getting closer