In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import shutil
import random

In [28]:
def sample_images_and_masks(images_dir, masks_dir, output_images_dir, output_masks_dir, size_limit_mbs):
    # Convert size limit from MB to bytes
    size_limit_bytes = size_limit_mbs * 1024 * 1024

    # Create output directories if they don't exist
    os.makedirs(output_images_dir, exist_ok=True)
    os.makedirs(output_masks_dir, exist_ok=True)

    # Get list of image filenames
    image_filenames = os.listdir(images_dir)
    random.shuffle(image_filenames)

    # Function to get size of a directory
    def get_directory_size(directory):
        total_size = 0
        for dirpath, _, filenames in os.walk(directory):
            for filename in filenames:
                filepath = os.path.join(dirpath, filename)
                total_size += os.path.getsize(filepath)
        return total_size

    # Initialize total size
    total_size = get_directory_size(output_images_dir)

    # Sample files until the total size exceeds the size limit
    for image_filename in image_filenames:
        if total_size >= size_limit_bytes:
            break

        # Construct paths for image and mask
        image_path = os.path.join(images_dir, image_filename)
        mask_path = os.path.join(masks_dir, image_filename)

        # Check if image path exists
        if not os.path.isfile(image_path):
            print(f"Image not found: {image_path}")
            continue

        # Check if mask path exists
        if not os.path.isfile(mask_path):
            print(f"Mask not found: {mask_path}")
            continue

        # Get size of image and mask
        image_size = os.path.getsize(image_path)
        mask_size = os.path.getsize(mask_path)

        # Check if adding the image and mask would exceed the size limit
        if total_size + image_size + mask_size > size_limit_bytes:
            break

        # Copy image and mask to output directories
        shutil.copy(image_path, output_images_dir)
        shutil.copy(mask_path, output_masks_dir)

        # Update total size
        total_size += image_size + mask_size

    print("Sampling completed.")

In [7]:
!pwd

/notebooks/continual-learning


In [35]:
sample_images_and_masks(
    images_dir="../data/CaDISv2/train",
    masks_dir="../data/final_cadisv2_cataract1k_jsons/cadv2-masks/train",
    output_images_dir="../data/cadis_random64MB/train",
    output_masks_dir="../data/cadis_random64MB/train_masks",
    size_limit_mbs=64  # 32MB size limit
)

Sampling completed.


In [2]:
from utils.coco_utils import COCOUtils

# Copy images wrt coco-json files


## Copy Train

In [9]:
from utils.coco_utils import COCOUtils
COCOUtils.copy_images_from_coco("./data/catract-1k/cataract1k-updated-jsons/train/train_cataract1k_annotations.json", 
                                "./data/catract-1k/train", 
                                "./data/catract-1k/pictures-combined")



loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
Images copied successfully.


## Copy Val

In [10]:
from utils.coco_utils import COCOUtils
COCOUtils.copy_images_from_coco("./data/catract-1k/cataract1k-updated-jsons/val/val_cataract1k_annotations.json", 
                                "./data/catract-1k/val", 
                                "./data/catract-1k/pictures-combined")

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Images copied successfully.


## Copy Test

In [11]:
from utils.coco_utils import COCOUtils
COCOUtils.copy_images_from_coco("./data/catract-1k/cataract1k-updated-jsons/test/test_cataract1k_annotations.json", 
                                "./data/catract-1k/test", 
                                "./data/catract-1k/pictures-combined")

loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
Images copied successfully.


In [7]:
original_annotation_file = 'data/cadis/training.json'
output_directory = 'sampled_images'
cocoutils = COCOUtils(original_annotation_file, output_directory)

target_size_mb = 10  # Example target size in MB

# Call the random_sample method to perform random sampling
cocoutils.random_sample(target_size_mb, "data/cadis/training")

loading annotations into memory...
Done (t=0.20s)
creating index...
index created!
Total size of sampled images: 9.831022262573242 MB
Number of sampled images: 16


In [2]:
import json
import os
import shutil

In [1]:
with open("./data/cadis/validation.json", "r") as file:
    data = json.load(file)

NameError: name 'json' is not defined

In [5]:
COCOUtils.merge_coco_jsons("/Users/anar/Desktop/TUM/summer24/di-lab/code/continual-learning/sampled_images/coco1.json",
                           "/Users/anar/Desktop/TUM/summer24/di-lab/code/continual-learning/sampled_images/coco2.json",
                           "/Users/anar/Desktop/TUM/summer24/di-lab/code/continual-learning/test-merging")

loading annotations into memory...


FileNotFoundError: [Errno 2] No such file or directory: '/Users/anar/Desktop/TUM/summer24/di-lab/code/continual-learning/sampled_images/coco1.json'

In [20]:
with open("/Users/anar/Desktop/TUM/summer24/di-lab/code/continual-learning/test-merging/merged_coco.json", "r") as file:
    data1 = json.load(file)

In [36]:
[1]+[1]

[1, 1]

In [21]:
data1["categories"]

[{'id': 3, 'name': 'Eye Retractors'},
 {'id': 8, 'name': 'Hydro. Cannula'},
 {'id': 9, 'name': 'Visco. Cannula'},
 {'id': 10, 'name': 'Cap. Cystotome'},
 {'id': 11, 'name': 'Rycroft Cannula'},
 {'id': 12, 'name': 'Bonn Forceps'},
 {'id': 13, 'name': 'Primary Knife'},
 {'id': 14, 'name': 'Phaco. Handpiece'},
 {'id': 15, 'name': 'Lens Injector'},
 {'id': 16, 'name': 'A/I Handpiece'},
 {'id': 17, 'name': 'Secondary Knife'},
 {'id': 18, 'name': 'Micromanipulator'},
 {'id': 20, 'name': 'Cap. Forceps'},
 {'id': 26, 'name': 'Water Sprayer'},
 {'id': 27, 'name': 'Suture Needle'},
 {'id': 28, 'name': 'Needle Holder'},
 {'id': 29, 'name': 'Charleux Cannula'},
 {'id': 30, 'name': 'Vannas Scissors'},
 {'id': 32, 'name': 'Viter. Handpiece'},
 {'id': 33, 'name': 'Mendez Ring'},
 {'id': 34, 'name': 'Biomarker'},
 {'id': 35, 'name': 'Marker'}]

In [26]:
data["categories"]

[{'id': 3, 'name': 'Eye Retractors'},
 {'id': 8, 'name': 'Hydro. Cannula'},
 {'id': 9, 'name': 'Visco. Cannula'},
 {'id': 10, 'name': 'Cap. Cystotome'},
 {'id': 11, 'name': 'Rycroft Cannula'},
 {'id': 12, 'name': 'Bonn Forceps'},
 {'id': 13, 'name': 'Primary Knife'},
 {'id': 14, 'name': 'Phaco. Handpiece'},
 {'id': 15, 'name': 'Lens Injector'},
 {'id': 16, 'name': 'A/I Handpiece'},
 {'id': 17, 'name': 'Secondary Knife'},
 {'id': 18, 'name': 'Micromanipulator'},
 {'id': 20, 'name': 'Cap. Forceps'},
 {'id': 26, 'name': 'Water Sprayer'},
 {'id': 27, 'name': 'Suture Needle'},
 {'id': 28, 'name': 'Needle Holder'},
 {'id': 29, 'name': 'Charleux Cannula'},
 {'id': 30, 'name': 'Vannas Scissors'},
 {'id': 32, 'name': 'Viter. Handpiece'},
 {'id': 33, 'name': 'Mendez Ring'},
 {'id': 34, 'name': 'Biomarker'},
 {'id': 35, 'name': 'Marker'}]

In [37]:
data["annotations"][0]["category_id"]

18

In [16]:
data["licenses"]

[{'url': 'http://creativecommons.org/licenses/by/2.0/',
  'id': 1,
  'name': 'Attribution License'}]

In [21]:
data1["licenses"]

[{'url': 'http://creativecommons.org/licenses/by/2.0/',
  'id': 1,
  'name': 'Attribution License'}]

In [31]:
data1["categories"]

[{'id': 3, 'name': 'Eye Retractors'},
 {'id': 8, 'name': 'Hydro. Cannula'},
 {'id': 9, 'name': 'Visco. Cannula'},
 {'id': 10, 'name': 'Cap. Cystotome'},
 {'id': 11, 'name': 'Rycroft Cannula'},
 {'id': 12, 'name': 'Bonn Forceps'},
 {'id': 13, 'name': 'Primary Knife'},
 {'id': 14, 'name': 'Phaco. Handpiece'},
 {'id': 15, 'name': 'Lens Injector'},
 {'id': 16, 'name': 'A/I Handpiece'},
 {'id': 17, 'name': 'Secondary Knife'},
 {'id': 18, 'name': 'Micromanipulator'},
 {'id': 20, 'name': 'Cap. Forceps'},
 {'id': 26, 'name': 'Water Sprayer'},
 {'id': 27, 'name': 'Suture Needle'},
 {'id': 28, 'name': 'Needle Holder'},
 {'id': 29, 'name': 'Charleux Cannula'},
 {'id': 30, 'name': 'Vannas Scissors'},
 {'id': 32, 'name': 'Viter. Handpiece'},
 {'id': 33, 'name': 'Mendez Ring'},
 {'id': 34, 'name': 'Biomarker'},
 {'id': 35, 'name': 'Marker'}]

In [15]:
print(len(data["annotations"]))
print(len(data["images"]))

6920
3584


In [2]:
from pycocotools.coco import COCO
import random

In [3]:
coco_annotation_file = './data/cadis/training.json'
coco = COCO(coco_annotation_file)

loading annotations into memory...
Done (t=0.17s)
creating index...
index created!


In [41]:
# Get all image IDs from the dataset
image_ids = coco.getImgIds()

# Set the number of images you want to sample
num_images_to_sample = 10

# Randomly select image IDs
sampled_image_ids = random.sample(image_ids, num_images_to_sample)

# Retrieve image file paths or URLs
for image_id in sampled_image_ids:
    image_info = coco.loadImgs(image_id)[0]
    image_file_path = image_info['file_name']  # Or 'coco_url' for URLs
    print("Sampled image file path:", image_file_path)

Sampled image file path: Video9_frame010680.png
Sampled image file path: Video13_frame000630.png
Sampled image file path: Video20_frame016010.png
Sampled image file path: Video17_frame000510.png
Sampled image file path: Video15_frame003960.png
Sampled image file path: Video3_frame003540.png
Sampled image file path: Video11_frame007720.png
Sampled image file path: Video13_frame008140.png
Sampled image file path: Video17_frame009720.png
Sampled image file path: Video25_frame007830.png


In [43]:
filtered_annotations = []
filtered_images = []
for image_id in sampled_image_ids:
    # Get annotations for the image
    annotations_ids = coco.getAnnIds(imgIds=image_id)
    annotations = coco.loadAnns(annotations_ids)
    filtered_annotations.extend(annotations)
    
    # Get image information
    image_info = coco.loadImgs(image_id)[0]
    filtered_images.append(image_info)

    original_image_path = os.path.join('data', 'cadis','training', image_info['file_name'])
    image_path = os.path.join('sampled_images', image_info['file_name'])
    shutil.copy(original_image_path, image_path)

# Create a new COCO-style JSON object containing only the filtered information
filtered_coco_data = {
    "info": coco.dataset["info"],
    "licenses": coco.dataset["licenses"],
    "images": filtered_images,
    "annotations": filtered_annotations,
    "categories": coco.dataset["categories"]
}

# Save the filtered COCO-style JSON object to a new file
filtered_coco_annotation_file = 'sampled_images/sampled-coco.json'
with open(filtered_coco_annotation_file, 'w') as f:
    json.dump(filtered_coco_data, f)

In [1]:
from utils.sampler import COCOSampler

In [4]:
# Initialize COCOSampler with the original annotation file and output directory
original_annotation_file = 'data/cadis/training.json'
output_directory = 'sampled_images'
sampler = COCOSampler(original_annotation_file, output_directory)

# Specify the number of samples you want to randomly select
target_size_mb = 1  # Example target size in MB

# Call the random_sample method to perform random sampling
sampler.random_sample(target_size_mb)

loading annotations into memory...
Done (t=0.21s)
creating index...
index created!
Total size of sampled images: 0.5904836654663086 MB
Number of sampled images: 1
