In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../')
sys.path.append('../resnet_model')

In [2]:
import cv2
import matplotlib.pyplot as plt
import numpy as np

from utils.general.read_files import read_from_json
from utils.general.dataset_variables import CholecSeg8kVariables

seq_to_split_dict = CholecSeg8kVariables.seq_to_split_dict

In [3]:
import os
import cv2
import numpy as np
from shutil import copyfile


def convert_cholecseg8k_tissue_instrument(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    img_dir = os.path.join(output_dir, "img_dir")
    ann_dir = os.path.join(output_dir, "ann_dir")
    vis_dir = os.path.join(output_dir, "vis_dir")
    os.makedirs(img_dir, exist_ok=True)
    os.makedirs(ann_dir, exist_ok=True)
    os.makedirs(vis_dir, exist_ok=True)

    id_map = CholecSeg8kVariables.conversion_from_cholecseg8k_to_tissue_segmentation
    old_to_new_id = {k: v['id'] for k, v in id_map.items()}
    new_id_to_color = {v['id']: v['color'] for k, v in id_map.items()}

    all_clip_dirs = []
    for video_dir in sorted(os.listdir(input_dir)):
        video_path = os.path.join(input_dir, video_dir)
        if not os.path.isdir(video_path):
            continue
        for clip in os.listdir(video_path):
            clip_path = os.path.join(video_path, clip)
            if os.path.isdir(clip_path):
                all_clip_dirs.append(clip_path)
    
    for clip_path in all_clip_dirs:
        for fname in os.listdir(clip_path):
            if fname.endswith("_endo.png") and "color" not in fname and "mask" not in fname:
                frame_id = fname.replace("_endo.png", "")
                frame_id = frame_id.replace("frame_", "")
                
                # video_tag = clip_path.split('/')
                video_tag = os.path.basename(clip_path)
                video_tag = video_tag.split('_')[0].replace('video', '')
                new_base = f"VID_{video_tag}_{frame_id}.png"

                img_path = os.path.join(clip_path, fname)
                mask_path = os.path.join(clip_path, f"frame_{frame_id}_endo_watershed_mask.png")
                
                
                
                if not os.path.exists(mask_path):
                    continue
                                       
                # Copy image
                copyfile(img_path, os.path.join(img_dir, new_base))
                
                # Convert mask
                raw_mask = cv2.imread(mask_path, 0)
                converted_mask = np.zeros_like(raw_mask, dtype=np.uint8)
                color_mask = np.zeros((*raw_mask.shape, 3), dtype=np.uint8)

                unique_ids = np.unique(raw_mask)
                for old_id in unique_ids:
                    new_id = old_to_new_id.get(old_id, 0)
                    color = new_id_to_color.get(new_id, (0, 0, 0))
                    converted_mask[raw_mask == old_id] = new_id
                    color_mask[raw_mask == old_id] = color

                cv2.imwrite(os.path.join(ann_dir, f"{new_base}"), converted_mask)
                cv2.imwrite(os.path.join(vis_dir, f"{new_base}"), color_mask)

    print("✅ All images and masks converted for tissue segmentation.")

# Example usage:
convert_cholecseg8k_tissue_instrument( 
    input_dir='../../datasets/cholecseg8k/archive',  # The folder containing video_01, video_02, etc.
    output_dir='../../datasets/cholecseg8k/tissue_instrument'
)


✅ All images and masks converted for tissue segmentation.


In [5]:
import os
from shutil import copyfile
from tqdm import tqdm

def split_tissue_instrument_dataset(input_dir, output_dir):
    

    for split in ['train', 'val', 'test']:
        for subfolder in ['img_dir', 'ann_dir', 'vis_dir']:
            os.makedirs(os.path.join(output_dir, split, subfolder), exist_ok=True)

    image_dir = os.path.join(input_dir, 'img_dir')
    ann_dir = os.path.join(input_dir, 'ann_dir')
    vis_dir = os.path.join(input_dir, 'vis_dir')

    all_images = sorted(os.listdir(image_dir))

    for filename in tqdm(all_images, desc="Splitting into train/val/test"):
        seq_id = filename.split('_')[1]  # e.g., VID_01_16396.png -> '01'
        vid_key = f"VID{seq_id.lstrip('0')}"  # handles zero-padding, e.g., VID01 -> VID1

        # Fix if some keys are still like VID01
        if f"VID{seq_id}" in seq_to_split_dict:
            split = seq_to_split_dict[f"VID{seq_id}"]
        elif vid_key in seq_to_split_dict:
            split = seq_to_split_dict[vid_key]
        else:
            raise ValueError(f'{seq_id} cannot be found')

        for subfolder in ['img_dir', 'ann_dir', 'vis_dir']:
            src = os.path.join(input_dir, subfolder, filename)
            dst = os.path.join(output_dir, split, subfolder, filename)
            if os.path.exists(src):
                copyfile(src, dst)

    print("✅ Dataset successfully split.")


In [6]:
# Example usage:
split_tissue_instrument_dataset(
    input_dir='../../datasets/cholecseg8k/tissue_instrument',
    output_dir='../../datasets/cholecseg8k/tissue_instrument_mmseg_formatted'
)

Splitting into train/val/test: 100%|██████████| 8080/8080 [03:10<00:00, 42.31it/s]

✅ Dataset successfully split.





## Generate the mmdetection version