# Create gt data for the insdet dataset from 
https://github.com/insdet/instance-detection

1. put the images in folder "images"
2. put corresponding gt xml files in folder "xml"
3. define objects you want to create gt for
4. execute cells below

In [7]:
cropped_image_width = 2048
cropped_image_heigth = 1536
target_category_names = ["thermos_flask_purple","banana_milk_drink","ousa_grated_cheese","korean_ssamjang","hellmanns_mayonnaise","pasta_sauce_black_pepper","illy_blend","lindor_salted_caramel","truffettes","nabati_cheese_wafer","tragata_olive_oil","tulip_luncheon_meat","costa_caramel"]

# Make crops from images and create gt file

In [8]:
import os
import xml.etree.ElementTree as ET
import cv2
import random
import json
import re

# Function to clean category names by removing leading numbers and underscores
def clean_category_name(name):
    return re.sub(r'^\d+_', '', name).strip()

# Parse the XML annotation file
def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    objects = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        xmin = int(obj.find('bndbox/xmin').text)
        ymin = int(obj.find('bndbox/ymin').text)
        xmax = int(obj.find('bndbox/xmax').text)
        ymax = int(obj.find('bndbox/ymax').text)
        objects.append({
            'name': name,
            'bbox': [xmin, ymin, xmax, ymax]
        })
    return objects

# Function to check if a bounding box is at least partially within a given crop
def is_bbox_partially_within_crop(bbox, crop):
    crop_x, crop_y, crop_w, crop_h = crop
    xmin, ymin, xmax, ymax = bbox
    return not (xmax < crop_x or xmin > crop_x + crop_w or ymax < crop_y or ymin > crop_y + crop_h)

# Process file to get unique categories
def process_file(filepath, category_mapping):
    tree = ET.parse(filepath)
    root = tree.getroot()
    
    for obj in root.findall('object'):
        category_name = obj.find('name').text
        if category_name:
            if category_name not in category_mapping:
                # Add new category with a placeholder for ID
                category_mapping[category_name] = None

# Generate category mapping
def generate_category_mapping(directory):
    category_mapping = {}
    
    # Process each XML file in the directory
    for filename in os.listdir(directory):
        if filename.endswith('.xml'):
            process_file(os.path.join(directory, filename), category_mapping)
    
    # Assign IDs to each category
    sorted_categories = sorted(category_mapping.keys())
    for idx, category_name in enumerate(sorted_categories, start=1):
        category_mapping[category_name] = idx
    
    return category_mapping

# Main function to process images and annotations
def main(xml_directory, image_directory, output_json,crop_width,crop_height):
    # Generate category mapping from XML files
    category_mapping = generate_category_mapping(xml_directory)

    # Create reverse category mapping with cleaned names
    reverse_category_mapping = {v: clean_category_name(k) for k, v in category_mapping.items()}

    # Create the final data structure
    data = {
        "images": [],
        "annotations": [],
        "categories": [
            {"id": id, "name": clean_category_name(reverse_category_mapping[id]), "supercategory": "object"}
            for id in category_mapping.values()
        ]
    }

    used_objects = set()
    annotation_id = 0
    image_id = 0  # Initialize unique image ID

    # Process each image and its corresponding annotation
    for image_filename in os.listdir(image_directory):
        if not image_filename.lower().endswith(('.jpg', '.jpeg', '.png')):
            continue
        
        image_path = os.path.join(image_directory, image_filename)
        annotation_filename = os.path.splitext(image_filename)[0] + '.xml'
        annotation_path = os.path.join(xml_directory, annotation_filename)
        
        if not os.path.exists(annotation_path):
            print(f"Annotation file {annotation_path} does not exist. Skipping {image_filename}.")
            continue

        # Load image
        image = cv2.imread(image_path)
        img_height, img_width, _ = image.shape

        # Parse XML annotation
        objects = parse_xml(annotation_path)

        # Sort objects by the x-coordinate of their leftmost side (xmin)
        objects.sort(key=lambda obj: obj['bbox'][0])

        used_objects = set()
        for obj in objects:
            if obj['name'] in used_objects:
                continue
            
            valid_crop = False
            max_attempts = 2000
            attempt_counter = 0
            while not valid_crop and attempt_counter < max_attempts:
                attempt_counter += 1
                # Ensure crop size is always larger than the bounding box
                xmin, ymin, xmax, ymax = obj['bbox']
                min_crop_width = xmax - xmin + 1
                min_crop_height = ymax - ymin + 1

                # Center crop around the current object
                x = max(0, xmin - random.randint(0, crop_width - min_crop_width))
                y = max(0, ymin - random.randint(0, crop_height - min_crop_height))
                
                if x + crop_width > img_width:
                    x = img_width - crop_width
                if y + crop_height > img_height:
                    y = img_height - crop_height

                crop = (x, y, crop_width, crop_height)
                
                # Extract objects within the crop
                crop_objects = []
                for obj_candidate in objects:
                    if is_bbox_partially_within_crop(obj_candidate['bbox'], crop):
                        # Calculate adjusted bounding box coordinates relative to the crop
                        xmin_c, ymin_c, xmax_c, ymax_c = obj_candidate['bbox']
                        adj_xmin = max(xmin_c - x, 0)
                        adj_ymin = max(ymin_c - y, 0)
                        adj_xmax = min(xmax_c - x, crop_width)
                        adj_ymax = min(ymax_c - y, crop_height)
                        adj_width = adj_xmax - adj_xmin
                        adj_height = adj_ymax - adj_ymin
                        crop_objects.append({
                            'category_id': category_mapping[obj_candidate['name']],
                            'bbox': [adj_xmin, adj_ymin, adj_width, adj_height],
                            'area': adj_width * adj_height,
                            'segmentation': [],
                            'iscrowd': 0,
                            'id': annotation_id,
                            'image_id': image_id  # Use unique image ID
                        })
                        annotation_id += 1
                
                # Check if the crop is valid: at least one object is partially inside
                if len(crop_objects) > 0:
                    valid_crop = True
                    used_objects.update(obj_candidate['name'] for obj_candidate in objects if is_bbox_partially_within_crop(obj_candidate['bbox'], crop))
            
            # Save crop info
            crop_image = image[y:y + crop_height, x:x + crop_width]
            crop_filename = f'crop_{image_id}_{x}_{y}_{os.path.splitext(image_filename)[0]}.jpg'
            cv2.imwrite(crop_filename, crop_image)
            
            # Add to data structure
            data['images'].append({
                "id": image_id,  # Assign unique ID to each cropped image
                "file_name": crop_filename,
                "width": crop_width,
                "height": crop_height
            })
            data['annotations'].extend(crop_objects)

            image_id += 1  # Increment image ID for the next crop

    # Save the final JSON structure
    with open(output_json, 'w') as json_file:
        json.dump(data, json_file, indent=4)

    print(f"Prozess abgeschlossen. Das Ergebnis wurde in {output_json} gespeichert.")

if __name__ == "__main__":
    # Specify the directories containing the XML files and images
    xml_directory = 'xml'  # Update this path as needed
    image_directory = 'images'  # Update this path as needed
    output_json = 'final_crops_info.json'

    main(xml_directory, image_directory, output_json,cropped_image_width,cropped_image_heigth)


Prozess abgeschlossen. Das Ergebnis wurde in final_crops_info.json gespeichert.


# Filter the gt for the target objects

In [9]:
import json

json_file = 'final_crops_info.json'

with open(json_file, 'r') as file:
    data = json.load(file)

# get target ids
target_category_ids = [category['id'] for category in data['categories'] if category['name'] in target_category_names]
# Remove all categories that are not target categories
data['categories'] = [category for category in data['categories'] if category['id'] in target_category_ids]
# Remove all annotations that do not belong to target categories
data['annotations'] = [annotation for annotation in data['annotations'] if annotation['category_id'] in target_category_ids]
with open('filtered_crops.json', 'w') as output_file:
    json.dump(data, output_file, indent=4)

Bereinigung abgeschlossen. Ergebnis wurde in 'bereinigte_daten.json' gespeichert.
