In [1]:
%load_ext autoreload
%autoreload 2



import sys
sys.path.append('..')
import os
import shutil
from os.path import join

from collections import Counter
from utils.general.read_files import read_from_json
from utils.general.save_files import save_to_json

## 0. Get the length of the CHolecinstanceseg. Prep work. 

In [None]:
cholec_instance_seg_path = '../../datasets/my_cholecinstanceseg/final_cholecinstanceseg/'

In [None]:
def file_names_in_cholecinstanceseg(cholec_instance_seg_path):
    # List to store filenames for all matching folders
    all_annotations = {}
    all_annotations_in_list = []

    # Iterate through the dataset directory
    for root, dirs, _ in os.walk(cholec_instance_seg_path):
        for folder in dirs:
            # Check if the folder ends with "_t50_sparse" or "_t50_full"
            if folder.endswith("_t50_sparse") or folder.endswith("_t50_full"):
                ann_dir_path = os.path.join(root, folder, "ann_dir")
                
                # Check if the ann_dir exists
                if os.path.exists(ann_dir_path) and os.path.isdir(ann_dir_path):
                    # Get all files in the ann_dir
                    files = [f for f in os.listdir(ann_dir_path) if os.path.isfile(os.path.join(ann_dir_path, f))]
                    
                    # Save the list of annotation files for this folder
                    all_annotations[folder] = files
                    all_annotations_in_list.extend(files)

    return all_annotations, all_annotations_in_list



In [None]:

# Get all annotation files
all_annotations, all_annotations_in_list  = file_names_in_cholecinstanceseg(cholec_instance_seg_path)


In [None]:
len(all_annotations_in_list)

We have only annotated 31k from CholecT50

## 1. Create  ann_dir_move_files_from_cholecinstanceseg

In [None]:
def filter_annotations(all_predicted_annotations, new_annotations_dir, already_annotated_in_cholecinstanceseg_v1, all_annotations_list):
    """
    Copies all files from ann_dir_all to ann_dir_filtered and removes files in all_annotations_list.

    Args:
        ann_dir_all (str): Path to the source directory containing all annotation files.
        ann_dir_filtered (str): Path to the destination directory where filtered files will be stored.
        all_annotations_list (list): List of annotation files to exclude.
    """
    # Ensure the destination directory exists
    os.makedirs(new_annotations_dir, exist_ok=True)
    os.makedirs(already_annotated_in_cholecinstanceseg_v1, exist_ok=True)

    # Copy all files from ann_dir_all to ann_dir_filtered
    for file_name in os.listdir(all_predicted_annotations):
        full_file_path = os.path.join(all_predicted_annotations, file_name)
        if os.path.isfile(full_file_path):
            shutil.copy(full_file_path, new_annotations_dir)

    # move files that are in the all_annotations_list
    for file_name in all_annotations_list:
        file_path_to_move = join(new_annotations_dir, file_name)
        if os.path.exists(file_path_to_move):
            shutil.move(file_path_to_move, already_annotated_in_cholecinstanceseg_v1)

In [None]:
# Example usage
from os.path import join 
all_predicted_annotations = '../../datasets/cholecinstanceseg_extension/CholecT50_predictions/'
new_annotations_dir = '../../datasets/cholecinstanceseg_extension/new_annotations/'
already_annotated_in_cholecinstanceseg_v1 = '../../datasets/cholecinstanceseg_extension/already_annotated_in_cholecinstanceseg_v1/'

filter_annotations(all_predicted_annotations, new_annotations_dir, already_annotated_in_cholecinstanceseg_v1,all_annotations_in_list)

sanity checks

In [None]:
100863 - 30998

## 2. Next I get a list

### Get inconistency between t50 and Cholecinstanceseg-extension and accepted frames

In [1]:
import sys
sys.path.append('..')

import os
from os.path import join
from collections import Counter
from utils.general.read_files import read_from_json
from utils.general.save_files import save_to_json

In [2]:
def update_accepted_frames_and_error_dict_per_image(accepted_frames_per_dataset,
                                                    error_dict_between_both_datasets,
                                                    ann_path_cholecinstanceseg_pseudo,
                                                    vid_ann_path_cholect50,
                                                    ann_id_cholecT50,
                                                    vid_name_cholecT50):
    ann_name = os.path.basename(ann_path_cholecinstanceseg_pseudo)    
    
    # instruments in cholecT50
    ann_vid_cholecT50 = read_from_json(vid_ann_path_cholect50)
    ann_cholecT50_img = ann_vid_cholecT50['annotations'][str(ann_id_cholecT50)]
    
    
    cholecT50_video_instrument_dict = ann_vid_cholecT50['categories']['instrument']
    cholecT50_video_triplet_dict = ann_vid_cholecT50['categories']['triplet']
    
    
    instruments_in_cholecT50_img =  [cholecT50_video_instrument_dict[str(single_ivt_annotation[1])]  for single_ivt_annotation in ann_cholecT50_img if single_ivt_annotation[1] != -1]
    triplets_in_cholecT50_img =  [cholecT50_video_triplet_dict[str(single_ivt_annotation[0])]  for single_ivt_annotation in ann_cholecT50_img if single_ivt_annotation[1] != -1]
    
    
    #instruments in cholecinstanceseg
    ann_cholecinstanceseg_img_pseudo = read_from_json(ann_path_cholecinstanceseg_pseudo)
    class_instance_dict = {}

    for ann_shape in ann_cholecinstanceseg_img_pseudo['shapes']:
        label = ann_shape['label']
        instance_id = ann_shape['group_id']
        if label not in class_instance_dict:
            class_instance_dict[label] = {instance_id}
        else:
            class_instance_dict[label].add(instance_id)    
    
    instruments_in_cholecinstanceseg_img_pseudo = [class_name for class_name, instance_ids in class_instance_dict.items() for _ in instance_ids]
    instance_ids_in_cholecinstanceseg_img_pseudo = [instance_id for class_name, instance_ids in class_instance_dict.items() for instance_id in instance_ids]


    # Check if they have the same elements with the same frequency
    instruments_in_cholecinstanceseg_img_pseudo_counter = Counter(instruments_in_cholecinstanceseg_img_pseudo)
    instruments_in_cholecT50_img_counter = Counter(instruments_in_cholecT50_img)
    
    if instruments_in_cholecinstanceseg_img_pseudo_counter == instruments_in_cholecT50_img_counter:
        
        if any(x > 1 for x in instruments_in_cholecinstanceseg_img_pseudo_counter .values()): # reject for matching error.  
            error_dict_between_both_datasets[vid_name_cholecT50][ann_name] = {
                'reason' : 'matching',
                'cholecinstanceseg_classes': instruments_in_cholecinstanceseg_img_pseudo,
                'cholect50_classes': instruments_in_cholecT50_img,
                'triplet_classes':  triplets_in_cholecT50_img, 
                'instance_ids': instance_ids_in_cholecinstanceseg_img_pseudo,  
                
            } 
            print(f'evaluating {ann_name} - rejected matching.  cholecinstanceseg {instruments_in_cholecinstanceseg_img_pseudo}, cholect50 {instruments_in_cholecT50_img}')       
            
        else:    
            accepted_frames_per_dataset[vid_name_cholecT50].append(ann_name)
            print(f'evaluating {ann_name} - accepted')
        
        
    else:
        error_dict_between_both_datasets[vid_name_cholecT50][ann_name] = {
            'reason' : 'conflict',
            'cholecinstanceseg_classes': instruments_in_cholecinstanceseg_img_pseudo,
            'cholect50_classes': instruments_in_cholecT50_img,
            'triplet_classes':  triplets_in_cholecT50_img,   
            
        } 
        print(f'evaluating {ann_name} - rejected conflict.  cholecinstanceseg {instruments_in_cholecinstanceseg_img_pseudo}, cholect50 {instruments_in_cholecT50_img}')                                             

In [3]:
def generate_accepted_frames_and_error_dict(cholecinstanceseg_pseudo_dataset_removed_gt_ann_dir, cholect50_dataset_labels_dir):
    assert os.path.isdir(cholect50_dataset_labels_dir), 'the cholect50_dataset_labels_dir dataset path is not a directory' 
    assert os.path.isdir(cholecinstanceseg_pseudo_dataset_removed_gt_ann_dir), 'the cholecinstanceseg_pseudo_dataset_removed_gt_ann_dir dataset path is not a directory' 
    error_dict_between_both_datasets = {}
    accepted_frames_per_dataset = {}
    for i, ann_name in enumerate(os.listdir(cholecinstanceseg_pseudo_dataset_removed_gt_ann_dir)):
        vid_name_cholecT50 =  ann_name.split('_')[1]        
        ann_path_cholecinstanceseg_pseudo = join(cholecinstanceseg_pseudo_dataset_removed_gt_ann_dir, ann_name)
        vid_ann_path_cholect50 = join(cholect50_dataset_labels_dir,  f'{vid_name_cholecT50}.json') 
    
        if vid_name_cholecT50 not in accepted_frames_per_dataset:
            accepted_frames_per_dataset[vid_name_cholecT50] = [] 
            
        if vid_name_cholecT50 not in error_dict_between_both_datasets:
            error_dict_between_both_datasets[vid_name_cholecT50] = {}      
            
        print(ann_name)
        ann_id_cholecT50 = int(ann_name.split('.')[0].split('_')[-1])
        
        update_accepted_frames_and_error_dict_per_image(accepted_frames_per_dataset,
                                                    error_dict_between_both_datasets,
                                                    ann_path_cholecinstanceseg_pseudo,
                                                    vid_ann_path_cholect50,
                                                    ann_id_cholecT50,
                                                    vid_name_cholecT50)
        
        # if i > 100: 
        #     break
    
    return  accepted_frames_per_dataset, error_dict_between_both_datasets           

In [6]:
new_annotations_path = '../../datasets/matching_and_conflict_for_current_cholecinstanceseg/1_all_cholecinstanceseg_annotations_from CholecT50'
cholect50_dataset = '../../datasets/Rendezvous/CholecT50/labels'


# new_annotations_path = '../../datasets/cholecinstanceseg_extension/new_annotations'
# cholect50_dataset = '../../datasets/Rendezvous/CholecT50/labels'

In [7]:
accepted_frames_per_dataset, error_dict_between_both_datasets  = generate_accepted_frames_and_error_dict(new_annotations_path, cholect50_dataset)

t50_VID01_000000.json
evaluating t50_VID01_000000.json - accepted
t50_VID01_000001.json
evaluating t50_VID01_000001.json - accepted
t50_VID01_000002.json
evaluating t50_VID01_000002.json - accepted
t50_VID01_000003.json
evaluating t50_VID01_000003.json - accepted
t50_VID01_000004.json
evaluating t50_VID01_000004.json - rejected conflict.  cholecinstanceseg [], cholect50 ['grasper']
t50_VID01_000005.json
evaluating t50_VID01_000005.json - rejected conflict.  cholecinstanceseg ['grasper'], cholect50 []
t50_VID01_000006.json
evaluating t50_VID01_000006.json - rejected conflict.  cholecinstanceseg ['grasper'], cholect50 []
t50_VID01_000007.json
evaluating t50_VID01_000007.json - rejected conflict.  cholecinstanceseg ['grasper'], cholect50 []
t50_VID01_000008.json
evaluating t50_VID01_000008.json - rejected conflict.  cholecinstanceseg ['grasper'], cholect50 []
t50_VID01_000009.json
evaluating t50_VID01_000009.json - rejected conflict.  cholecinstanceseg ['grasper'], cholect50 []
t50_VID01_

In [None]:
error_dict_between_both_datasets

In [8]:
# accepted_frames_per_dataset_save_path = 'outputs/accepted_frames_extension.json'
# error_dict_between_both_datasets_save_path = 'outputs/error_dict_between_extension_and_cholecT50_datasets.json'

accepted_frames_per_dataset_save_path = 'outputs/accepted_frames_tripletsegmentation_v1.json'
error_dict_between_both_datasets_save_path = 'outputs/error_dict_between_tripletsegmentation_v1.json'


In [9]:
save_to_json(data=accepted_frames_per_dataset, json_file_path=accepted_frames_per_dataset_save_path)
save_to_json(data=error_dict_between_both_datasets, json_file_path=error_dict_between_both_datasets_save_path)


In [10]:
total_accepted_dataset_length = 0
for vid_name in accepted_frames_per_dataset:
    selected_frames = accepted_frames_per_dataset[vid_name]
    total_accepted_dataset_length += len(selected_frames)

In [11]:
total_accepted_dataset_length

21443

In [12]:
total_length_of_conflict_errors = 0
total_length_of_matching_errors = 0
total_length_of_removed_frames = 0

for vid_name in error_dict_between_both_datasets:
    info_about_deleted_frames_for_video = error_dict_between_both_datasets[vid_name] 
    total_length_of_removed_frames += len(info_about_deleted_frames_for_video)
    
    removed_frames_due_to_conflict_for_video =  [ann_name for ann_name, ann_info in info_about_deleted_frames_for_video.items() if ann_info['reason'] == 'conflict']
    total_length_of_conflict_errors += len(removed_frames_due_to_conflict_for_video)
    
    removed_frames_due_to_matching_for_video =  [ann_name for ann_name, ann_info in info_about_deleted_frames_for_video.items() if ann_info['reason'] == 'matching']
    total_length_of_matching_errors += len(removed_frames_due_to_matching_for_video)
    
    

In [13]:
print(total_length_of_conflict_errors)
print(total_length_of_matching_errors)
print(total_length_of_removed_frames)

7194
2361
9555


## 3. FInally I create the dataset parts

In [2]:
def create_triplet_segmentation_per_image(ann_path_cholecinstanceseg,
                                          img_path,
                                          vid_ann_path_cholect50,
                                          ann_id_cholecT50,
                                          store_accepted_datasets_images_dir, 
                                          store_accepted_datasets_ann_dir,
                                          video_name, 
                                          img_name):

    ann_vid_cholecT50 = read_from_json(vid_ann_path_cholect50)
    ann_cholecinstanceseg_img = read_from_json(ann_path_cholecinstanceseg)
    ann_name  = os.path.basename(ann_path_cholecinstanceseg)
    save_path =  join(store_accepted_datasets_ann_dir, ann_name)

    ann_cholecT50_img = ann_vid_cholecT50['annotations'][str(ann_id_cholecT50)]

    cholecT50_video_verb_dict = ann_vid_cholecT50['categories']['verb']
    cholecT50_video_target_dict = ann_vid_cholecT50['categories']['target']
    cholecT50_video_instrument_dict = ann_vid_cholecT50['categories']['instrument']

    ivt_info_in_cholecT50_img =  { cholecT50_video_instrument_dict[str(single_ivt_annotation[1])]:  {
                                                'verb': cholecT50_video_verb_dict[str(single_ivt_annotation[7])],
                                                'target': cholecT50_video_target_dict[str(single_ivt_annotation[8])]
        }   for single_ivt_annotation in ann_cholecT50_img if single_ivt_annotation[1] != -1}

    print(f'{ann_name} - ivt_info_in_cholecT50_img : {ivt_info_in_cholecT50_img} ')

    # THis works because we can have a maximum of one class in our dataset
    for ann_shape in ann_cholecinstanceseg_img['shapes']:
        label = ann_shape['label']
        ann_shape['verb'] =    ivt_info_in_cholecT50_img[label]['verb']
        ann_shape['target'] =    ivt_info_in_cholecT50_img[label]['target']
        
            
    save_to_json(data=ann_cholecinstanceseg_img,json_file_path=save_path)
    
    #save image. 
    shutil.copy(img_path, join(store_accepted_datasets_images_dir, f't50_{video_name}_{img_name}'))
    
    


In [3]:
def generate_the_accepted_dataset(CholecT50_img_dir, 
                                        new_annotations_dir,
                                        store_accepted_datasets_images_dir, 
                                        store_accepted_datasets_ann_dir, 
                                        cholectT50_labels_dir, 
                                        accepted_frames_json_path, 
                                            ):
    
    assert os.path.isdir(cholectT50_labels_dir), 'the cholecT50 dataset path is not a directory' 
    assert os.path.isdir(new_annotations_dir), 'the cholecinstanceseg pseudo new_annotations_dir dataset path is not a directory' 
    assert os.path.isdir(CholecT50_img_dir), 'the cholecinstanceseg pseudo img_dir_all dataset path is not a directory' 
    
    accepted_frames = read_from_json(accepted_frames_json_path)
    os.makedirs(store_accepted_datasets_images_dir, exist_ok=True)
    os.makedirs(store_accepted_datasets_ann_dir, exist_ok=True)
    
    for video_name_cholecinstanceseg, accepted_frame_names in accepted_frames.items():
        vid_name_cholect50 =  video_name_cholecinstanceseg.split('_')[0]
        vid_ann_path_cholect50 = join(cholectT50_labels_dir, f'{vid_name_cholect50}.json')         
        

        for ann_name in accepted_frame_names:
            ann_id_cholecT50 = int(ann_name.split('.')[0].split('_')[-1])
            
                        
            img_name = ann_name.replace('json', 'png').replace('t50_', '').replace(f'{vid_name_cholect50}_', '')
            
            ann_path_cholecinstanceseg_pseudo = join(new_annotations_dir, ann_name)
            img_path = join(CholecT50_img_dir, vid_name_cholect50 , img_name)
            
            create_triplet_segmentation_per_image(ann_path_cholecinstanceseg_pseudo,
                                                  img_path,
                                                  vid_ann_path_cholect50,
                                                  ann_id_cholecT50,
                                                  store_accepted_datasets_images_dir, 
                                                  store_accepted_datasets_ann_dir, 
                                                  vid_name_cholect50, 
                                                  img_name)

In [6]:

CholecT50_img_dir = '../../datasets/Rendezvous/CholecT50/videos'
cholectT50_labels_dir = '../../datasets/Rendezvous/CholecT50/labels'
store_accepted_datasets_images_dir = '../../datasets/matching_and_conflict_for_current_cholecinstanceseg/3_accepted_annotations/img_dir' 
store_accepted_datasets_ann_dir = '../../datasets/matching_and_conflict_for_current_cholecinstanceseg/3_accepted_annotations/ann_dir' 
new_annotations_dir = '../../datasets/matching_and_conflict_for_current_cholecinstanceseg/1_all_cholecinstanceseg_annotations_from_CholecT50' 
accepted_frames_json_path = 'outputs/accepted_frames_tripletsegmentation_v1.json'

In [7]:
generate_the_accepted_dataset(CholecT50_img_dir, 
                            new_annotations_dir,
                            store_accepted_datasets_images_dir, 
                            store_accepted_datasets_ann_dir, 
                            cholectT50_labels_dir, 
                            accepted_frames_json_path, 
                                )

t50_VID01_000000.json - ivt_info_in_cholecT50_img : {'grasper': {'verb': 'grasp', 'target': 'gallbladder'}} 
t50_VID01_000001.json - ivt_info_in_cholecT50_img : {'grasper': {'verb': 'grasp', 'target': 'gallbladder'}} 
t50_VID01_000002.json - ivt_info_in_cholecT50_img : {'grasper': {'verb': 'grasp', 'target': 'gallbladder'}} 
t50_VID01_000003.json - ivt_info_in_cholecT50_img : {'grasper': {'verb': 'grasp', 'target': 'gallbladder'}} 
t50_VID01_000010.json - ivt_info_in_cholecT50_img : {'grasper': {'verb': 'grasp', 'target': 'gallbladder'}} 
t50_VID01_000011.json - ivt_info_in_cholecT50_img : {'grasper': {'verb': 'grasp', 'target': 'gallbladder'}} 
t50_VID01_000012.json - ivt_info_in_cholecT50_img : {'grasper': {'verb': 'grasp', 'target': 'gallbladder'}} 
t50_VID01_000013.json - ivt_info_in_cholecT50_img : {'grasper': {'verb': 'grasp', 'target': 'gallbladder'}} 
t50_VID01_000014.json - ivt_info_in_cholecT50_img : {'grasper': {'verb': 'grasp', 'target': 'gallbladder'}} 
t50_VID01_000015.js

In [None]:
def generate_the_disagrement_and_matching_issue_datasets(CholecT50_img_dir, 
                                        new_annotations_dir,
                                        disagreemnet_dataset_dir, 
                                        matching_issue_dataset_dir,
                                        error_dict_between_extension_and_cholecT50_datasets_json_path, 
                                        ):
    
    assert os.path.isdir(new_annotations_dir), 'the cholecinstanceseg pseudo new_annotations_dir dataset path is not a directory' 
    assert os.path.isdir(CholecT50_img_dir), 'the cholecinstanceseg pseudo img_dir_all dataset path is not a directory' 
    
    error_dict = read_from_json(error_dict_between_extension_and_cholecT50_datasets_json_path)
    
    # img_dir and ann_dir for disagreemnet    
    os.makedirs(join(disagreemnet_dataset_dir, 'img_dir'), exist_ok=True)
    os.makedirs(join(disagreemnet_dataset_dir, 'ann_dir'), exist_ok=True)
    
    # img_dir and ann_dir for matching_issue     
    os.makedirs(join(matching_issue_dataset_dir, 'img_dir'), exist_ok=True)
    os.makedirs(join(matching_issue_dataset_dir, 'ann_dir'), exist_ok=True)
    
    for video_name, video_error_dict in error_dict.items():
        for ann_name, frame_info in video_error_dict.items(): 
            
            img_name = ann_name.replace('json', 'png').replace('t50_', '').replace(f'{video_name}_', '')
            
            img_path = join(CholecT50_img_dir, video_name , img_name)
            ann_path = join(new_annotations_dir, ann_name)
            
            # print(frame_info)
            
            
            if frame_info['reason'] == 'conflict':
                shutil.copy(img_path, join(disagreemnet_dataset_dir, 'img_dir', f't50_{video_name}_{img_name}'))
                shutil.copy(ann_path, join(disagreemnet_dataset_dir, 'ann_dir'))
                
            elif frame_info['reason'] == 'matching':
                shutil.copy(img_path, join(matching_issue_dataset_dir, 'img_dir', f't50_{video_name}_{img_name}'))
                shutil.copy(ann_path, join(matching_issue_dataset_dir, 'ann_dir', ))
            
            else: 
                raise ValueError('where is my stuff')    


In [None]:

# CholecT50_img_dir = '../../datasets/Rendezvous/CholecT50/videos'
# cholectT50_labels_dir = '../../datasets/Rendezvous/CholecT50/labels'
# disagreemnet_dataset_dir = '../../datasets/cholecinstanceseg_extension/1_disagreement_between_datasets' 
# matching_issue_dataset_dir = '../../datasets/cholecinstanceseg_extension/2_matching_issue' 
# new_annotations_dir = '../../datasets/cholecinstanceseg_extension/new_annotations' 
# error_dict_between_extension_and_cholecT50_datasets_json_path = 'outputs/error_dict_between_extension_and_cholecT50_datasets.json'



CholecT50_img_dir = '../../datasets/Rendezvous/CholecT50/videos'
cholectT50_labels_dir = '../../datasets/Rendezvous/CholecT50/labels'
disagreemnet_dataset_dir = '../../datasets/matching_and_conflict_for_current_cholecinstanceseg/2_disagreement_issues' 
matching_issue_dataset_dir = '../../datasets/matching_and_conflict_for_current_cholecinstanceseg/1_matching_issues' 
new_annotations_dir = '../../datasets/matching_and_conflict_for_current_cholecinstanceseg/ann_dir' 
error_dict_between_extension_and_cholecT50_datasets_json_path = 'outputs/error_dict_between_extension_and_cholecT50_datasets_tripletsegmentation_v1.json'

In [None]:
generate_the_disagrement_and_matching_issue_datasets(CholecT50_img_dir, 
                                        new_annotations_dir,
                                        disagreemnet_dataset_dir, 
                                        matching_issue_dataset_dir,
                                        error_dict_between_extension_and_cholecT50_datasets_json_path, 
                                        )