In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

In [2]:
from ivtmetrics import Detection as ivtmetricsDetection
# from utils.ivt_metrics_utils.ivt_metrics_utils import run_ivt_metric_object_for_folder

## Get IVT metrics

In [3]:

from os.path import join
import os
import json
import numpy as np
from utils.general.read_files import read_from_json 

from utils.convert_to_coco.convert_dataset_to_instrument_class_coco import get_bbox_info_from_coco_contour_xy
from utils.general.dataset_variables import TripletSegmentationVariables


def get_list_of_info_dict_for_metric_calculation_in_img(json_dict: dict):
    
    INSTRUMENT_ID_TO_CLASS_DICT = TripletSegmentationVariables.categories['instrument']
    INSTRUMENT_CLASS_TO_ID_DICT = {instrument_class: instrument_id for instrument_id, instrument_class in INSTRUMENT_ID_TO_CLASS_DICT.items()}
    TRIPLET_ID_TO_CLASS_DICT = TripletSegmentationVariables.categories['triplet']
    TRIPLET_CLASS_TO_ID_DICT = {triplet_class: triplet_id for triplet_id, triplet_class in TRIPLET_ID_TO_CLASS_DICT.items()}
    IMGWIDTH = TripletSegmentationVariables.width
    IMGHEIGHT = TripletSegmentationVariables.height
    
    refactored_json_dict = {}
    for contour_info in json_dict['shapes']:
        class_name = contour_info['label']
        instance_id =  contour_info['group_id']
        contour_info = {
            "contour_pts" : contour_info['points'], 
            "score": contour_info.get('score', 1.0),
            "verb": contour_info['verb'],
            "verb_score": contour_info.get('verb_score', 1.0),
            "target": contour_info['target'],
            "target_score": contour_info.get('target_score', 1.0)
        }
        
        
        if((class_name, instance_id) in refactored_json_dict): 
            refactored_json_dict[(class_name, instance_id)].append(contour_info)    
        else:
            refactored_json_dict[(class_name, instance_id)] =  [contour_info] 
        
       
    list_of_info_dict_for_metric_calculation_in_img = [] 
    for (class_name, instance_id), contour_info_for_class_and_instance_id_list in refactored_json_dict.items():
        
        
        score = contour_info_for_class_and_instance_id_list[0]['score']
        verb_score = contour_info_for_class_and_instance_id_list[0]['verb_score']
        verb = contour_info_for_class_and_instance_id_list[0]['verb']
        target = contour_info_for_class_and_instance_id_list[0]['target']
        target_score = contour_info_for_class_and_instance_id_list[0]['target_score']
        
        triplet_name=f'{class_name},{verb},{target}'
        
        
        # convert to polygon_xy and use list this is an issue with labelme for getting bounding box information      
        contour_points_for_an_instance_polygon_xy= [] 
        for contour_info_for_class_and_instance_id in contour_info_for_class_and_instance_id_list:
            contour_pts =  np.array(contour_info_for_class_and_instance_id["contour_pts"], dtype=np.int32)
            contour_pts = contour_pts.flatten().tolist()
            
            contour_points_for_an_instance_polygon_xy.append(contour_pts)
        
        # get bboxes from contours
        x_min, y_min, width, height = get_bbox_info_from_coco_contour_xy(contour_points_for_an_instance_polygon_xy)
        
        if triplet_name not in TRIPLET_CLASS_TO_ID_DICT.keys():
            triplet_name = f'{class_name},null_verb,null_target'
           
        single_instance_info_dict_for_metric_calculation = {
            'triplet': str(int(TRIPLET_CLASS_TO_ID_DICT[triplet_name])-1), #zero init
            'instrument': [str(int(INSTRUMENT_CLASS_TO_ID_DICT[class_name])-1), score, x_min/IMGWIDTH,
                          y_min/IMGHEIGHT, width/IMGWIDTH, height/IMGHEIGHT],            
            'class_name': class_name,
            'instance_id': instance_id
        }  
            
        list_of_info_dict_for_metric_calculation_in_img.append(single_instance_info_dict_for_metric_calculation) 
    
    return list_of_info_dict_for_metric_calculation_in_img  


def run_ivt_metric_object_for_folder(ivt_metric_object,
                                    pred_ann_dir, 
                                    gt_ann_dir):  
    ivt_metric_object.reset_global() 
    pred_ann_list = sorted(os.listdir(pred_ann_dir))
    
    video_name_tracker = None

    for i, filename in enumerate(pred_ann_list):    
        pred_ann_path = join(pred_ann_dir, filename)
        gt_ann_path =  join(gt_ann_dir, filename)     
        print(f'currently on {i}, {filename}')  
        
        basename = filename.split('.')[0]
        basename = basename.replace('t50_', '')
        video_name, _ = basename.split('_') 
        
        if i ==  0:
            print('i = 0 initialize tracker')
            video_name_tracker = video_name
        if i > 0:             
            if video_name_tracker != video_name:
                print(f'changing video from {video_name_tracker} to {video_name}')
                ivt_metric_object.video_end()   # A new video
                video_name_tracker = video_name # update tracker
        
        
        json_dict_pred =  read_from_json(pred_ann_path)
        list_of_info_dict_for_metric_calculation_in_img_pred = get_list_of_info_dict_for_metric_calculation_in_img(json_dict_pred) 
        
        json_dict_gt =  read_from_json(gt_ann_path)
        list_of_info_dict_for_metric_calculation_in_img_gt = get_list_of_info_dict_for_metric_calculation_in_img(json_dict_gt)  
        
        print(f'pred, {list_of_info_dict_for_metric_calculation_in_img_pred}')
        print(f'gt, {list_of_info_dict_for_metric_calculation_in_img_gt}')
        

        # print(list_of_info_dict_for_metric_calculation_in_img_gt)
        
        #detect.update(labels, predictions, format=format)
        ivt_metric_object.update(targets=[list_of_info_dict_for_metric_calculation_in_img_gt],
                    predictions=[list_of_info_dict_for_metric_calculation_in_img_pred], 
                    format="dict")
        

In [4]:
ivt_metric_detection_object = ivtmetricsDetection()

pred_ann_dir = f'../resnet_model/work_dirs/threetask_resnet_fpn_parallel_decoders/combine_first_and_second_stage_results' 
gt_ann_dir = f'../../datasets/my_triplet_seg_datasets/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'

# pred_ann_dir = f'../resnet_model/work_dirs/rendezvous_cholectriplet_seg/combine_first_and_second_stage_results'  
# gt_ann_dir = f'../../datasets/my_triplet_seg_datasets/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'



run_ivt_metric_object_for_folder(ivt_metric_object=ivt_metric_detection_object,
                                 pred_ann_dir=pred_ann_dir,
                                 gt_ann_dir=gt_ann_dir)



# ivt_metric_detection_object = ivtmetricsDetection()

# # pred_ann_dir = f'../results/full_results_mask2former_multihead/mask2former_one_stage_multihead_full_best_coco_segm_mAP_iter_24500_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2/test/ann_dir'

# # pred_ann_dir = f'../results/full_results_mask2former_multihead/mask2former_one_stage_multihead_full_iter_35000_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2/test/ann_dir'

# # pred_ann_dir = f'../results/triplet_direct/mask2former_direct_triplet_prediction_best_coco_bbox_mAP_iter_60000_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2/test/ann_dir'

# # pred_ann_dir = f'../resnet_model/results/combine_first_and_second_stage_results/mask2former_test_triplet_segmentation_v2_dataset_on_full' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2/test/ann_dir'

# #fpn maskrcnn
# # pred_ann_dir = f'../resnet_model/work_dirs/multitask_resnet_fpn/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'


# # Mask2former standard
# # pred_ann_dir = f'../resnet_model/work_dirs/multitask_resnet_standard/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'

# # # multitask_resnet_fpn_test_on_gt
# # pred_ann_dir = f'../resnet_model/work_dirs/multitask_resnet_fpn_test_on_gt/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'

# # single stage combined
# # pred_ann_dir = f'../resnet_model/work_dirs/singletask_resnet_fpn_for_verbtarget_best/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'

# # pred_ann_dir = f'../resnet_model/work_dirs/singletask_resnet_fpn_verb_and_target_parallel_fc_predict_on_train/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/train/ann_dir'

# # pred_ann_dir = f'../resnet_model/work_dirs/singletask_resnet_fpn_verb_and_target_parallel_fc_predict_on_train/combine_first_and_second_stage_results'
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/train/ann_dir'


# pred_ann_dir = f'../resnet_model/work_dirs/threetask_resnet_fpn_parallel_decoders/combine_first_and_second_stage_results' 
# gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'

# # pred_ann_dir = f'../resnet_model/work_dirs/rendezvous_cholectriplet_seg/combine_first_and_second_stage_results'  
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'
       

currently on 0, t50_VID14_000000.json
i = 0 initialize tracker
pred, []
gt, []
currently on 1, t50_VID14_000001.json
pred, []
gt, []
currently on 2, t50_VID14_000002.json
pred, []
gt, []
currently on 3, t50_VID14_000003.json
pred, []
gt, []
currently on 4, t50_VID14_000004.json
pred, []
gt, []
currently on 5, t50_VID14_000005.json
pred, []
gt, []
currently on 6, t50_VID14_000006.json
pred, []
gt, []
currently on 7, t50_VID14_000007.json
pred, []
gt, []
currently on 8, t50_VID14_000008.json
pred, []
gt, []
currently on 9, t50_VID14_000009.json
pred, []
gt, []
currently on 10, t50_VID14_000010.json
pred, []
gt, []
currently on 11, t50_VID14_000012.json
pred, [{'triplet': '17', 'instrument': ['0', 0.9715732932090759, 0.6838407494145199, 0.14583333333333334, 0.31381733021077285, 0.475], 'class_name': 'grasper', 'instance_id': 1}]
gt, [{'triplet': '17', 'instrument': ['0', 1.0, 0.6791569086651054, 0.11666666666666667, 0.319672131147541, 0.5020833333333333], 'class_name': 'grasper', 'instanc

In [15]:
results_i = ivt_metric_detection_object.compute_video_AP('i')
print(f"results_i, {results_i['mAP']*100}" )
results_v = ivt_metric_detection_object.compute_video_AP('v')
print(f"results_v, {results_v['mAP']*100}" )
results_t = ivt_metric_detection_object.compute_video_AP('t')
print(f"results_t, {results_t['mAP']*100}" )
results_iv = ivt_metric_detection_object.compute_video_AP('iv')
print(f"results_iv, {results_iv['mAP']*100}" )
results_it = ivt_metric_detection_object.compute_video_AP('it')
print(f"results_it, {results_it['mAP']*100}" )
results_ivt = ivt_metric_detection_object.compute_video_AP('ivt')
print(f"results_ivt, {results_ivt['mAP']*100}" )

results_i, 48.12207317747241
results_v, 32.51468292169706
results_t, 16.202522683315344
results_iv, 14.36179326470501
results_it, 11.391005861049315
results_ivt, 8.711960044299245


In [None]:
# ivt_metric_detection_object = ivtmetricsDetection()

# # pred_ann_dir = f'../results/full_results_mask2former_multihead/mask2former_one_stage_multihead_full_best_coco_segm_mAP_iter_24500_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2/test/ann_dir'

# # pred_ann_dir = f'../results/full_results_mask2former_multihead/mask2former_one_stage_multihead_full_iter_35000_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2/test/ann_dir'

# # pred_ann_dir = f'../results/triplet_direct/mask2former_direct_triplet_prediction_best_coco_bbox_mAP_iter_60000_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2/test/ann_dir'

# # pred_ann_dir = f'../resnet_model/results/combine_first_and_second_stage_results/mask2former_test_triplet_segmentation_v2_dataset_on_full' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2/test/ann_dir'

# #fpn maskrcnn
# # pred_ann_dir = f'../resnet_model/work_dirs/multitask_resnet_fpn/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'


# # Mask2former standard
# # pred_ann_dir = f'../resnet_model/work_dirs/multitask_resnet_standard/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'

# # # multitask_resnet_fpn_test_on_gt
# # pred_ann_dir = f'../resnet_model/work_dirs/multitask_resnet_fpn_test_on_gt/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'

# # single stage combined
# # pred_ann_dir = f'../resnet_model/work_dirs/singletask_resnet_fpn_for_verbtarget_best/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'

# # pred_ann_dir = f'../resnet_model/work_dirs/singletask_resnet_fpn_verb_and_target_parallel_fc_predict_on_train/combine_first_and_second_stage_results' 
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/train/ann_dir'

# # pred_ann_dir = f'../resnet_model/work_dirs/singletask_resnet_fpn_verb_and_target_parallel_fc_predict_on_train/combine_first_and_second_stage_results'
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/train/ann_dir'


# pred_ann_dir = f'../resnet_model/work_dirs/threetask_resnet_fpn_parallel_decoders/combine_first_and_second_stage_results' 
# gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'

# # pred_ann_dir = f'../resnet_model/work_dirs/rendezvous_cholectriplet_seg/combine_first_and_second_stage_results'  
# # gt_ann_dir = f'../data/triplet_segmentation_dataset_v2_second_stage/test/ann_dir'


# run_ivt_metric_object_for_folder(ivt_metric_object=ivt_metric_detection_object,
#                                  pred_ann_dir=pred_ann_dir,
#                                  gt_ann_dir=gt_ann_dir)


       

currently on 0, t50_VID14_000000.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 1, t50_VID14_000001.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 2, t50_VID14_000002.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 3, t50_VID14_000003.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 4, t50_VID14_000004.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 5, t50_VID14_000005.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 6, t50_VID14_000006.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 7, t50_VID14_000007.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 8, t50_VID14_000008.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 9, t50_VID14_000009.json
pred, []
gt, []
original ground_truth []
[]
[[]]
[]
[[]]
currently on 10, t50_VID14_000010.json
pred, []
gt

In [None]:
results_i = ivt_metric_detection_object.compute_AP('i')
print(f'results_i, {results_i['mAP']*100}' )
results_v = ivt_metric_detection_object.compute_AP('v')
print(f'results_v, {results_v['mAP']*100}' )
results_t = ivt_metric_detection_object.compute_AP('t')
print(f'results_t, {results_t['mAP']*100}' )
results_iv = ivt_metric_detection_object.compute_AP('iv')
print(f'results_iv, {results_iv['mAP']*100}' )
results_it = ivt_metric_detection_object.compute_AP('it')
print(f'results_it, {results_it['mAP']*100}' )
results_ivt = ivt_metric_detection_object.compute_AP('ivt')
print(f'results_ivt, {results_ivt['mAP']*100}' )

results_i, 74.42303066379678
results_v, 42.56793918912681
results_t, 15.864385877438922
results_iv, 20.650149412295633
results_it, 11.141319621793471
results_ivt, 8.240903120530414


In [6]:
import numpy as np

In [8]:
print(results_ivt['AP'])

[nan, nan, nan, nan, 0.0, nan, 0.0, 0.0, nan, nan, nan, 0.0, 0.3323326024624726, nan, 0.0, nan, nan, 0.5142305155955997, 0.0, 0.4156828953037484, 0.2181818181818182, 0.0, 0.0, nan, 0.0, 0.0, nan, 0.14545454545454548, 0.0, 0.41837732160312807, nan, nan, 0.0, nan, 0.0, nan, 0.07575757575757576, nan, nan, 0.0, 0.0, nan, nan, 0.0, nan, 0.0, nan, nan, nan, nan, nan, 0.0, 0.0, nan, nan, nan, nan, 0.0, 0.2380308367112467, 0.0, 0.4610338956727119, 0.09090909090909091, 0.0, 0.0, 0.0, nan, nan, nan, 0.0, 0.153170359052712, 0.0, nan, nan, nan, 0.0, 0.0, nan, nan, 0.0, 0.5622358288858232, nan, 0.0, 0.4610540358595872, nan, nan, nan, nan, nan, 0.0, 0.0, 0.0, 0.0, 0.0, nan, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3636363636363637]


In [7]:
np.nanmean(results_ivt['AP'] ) 

0.07761869588458338

In [None]:
ivt_metric_detection_object

In [7]:
print(f'results_t, {[ap*100 for ap in results_t['AP'] ]}' )

results_t, [55.854473232824766, 0.0, 16.69697355772015, 9.090909090909092, 0.0, nan, 66.80988812650253, 0.0, 41.550301141681864, nan, 9.090909090909092, 0.0, 0.0, 13.527272727272727, 0.0]


In [9]:
len(results_t['AP'])

15