In [1]:
import os
import json
from collections import defaultdict

In [2]:
dataset_base_path = '../ham_concept_dataset/'
annotator_annotations_path = os.path.join(dataset_base_path, 'Datasets', 'ground_truth_annotations')
participant_annotations_path = os.path.join(dataset_base_path, 'Datasets', 'study_annotations')
output_json_path = '../ham_concept_dataset/annotation_summary.json'

In [3]:
json_parent_folders = [
    os.path.join(dataset_base_path, 'Datasets', 'ground_truth_annotations'),
    os.path.join(dataset_base_path, 'Datasets', 'study_annotations')
]
image_folders_paths = [
    os.path.join(dataset_base_path, 'ISIC2018_Task3_Training_Input'),
]

In [4]:
json_basenames = set()
print("Scanning for JSON files in specified 'Datasets' subfolders:")
for json_root_folder in json_parent_folders:
    abs_json_root_folder = os.path.abspath(json_root_folder)
    print(f"  - Scanning under: {abs_json_root_folder}")
    if os.path.exists(json_root_folder) and os.path.isdir(json_root_folder):
        for dirpath, dirnames, filenames in os.walk(json_root_folder):
            for filename in filenames:
                if filename.endswith('.json'):
                    basename = os.path.splitext(filename)[0] # e.g., "ISIC_0033928"
                    json_basenames.add(basename)
    else:
        print(f"Warning: JSON parent folder not found at {json_root_folder}")

Scanning for JSON files in specified 'Datasets' subfolders:
  - Scanning under: /home/nqmtien/THESIS/REIT4841/ham-concept/ham_concept_dataset/Datasets/ground_truth_annotations
  - Scanning under: /home/nqmtien/THESIS/REIT4841/ham-concept/ham_concept_dataset/Datasets/study_annotations


In [5]:
isic_annotation_data = []

if 'json_basenames' not in globals() or not json_basenames:
    print("Error: 'json_basenames' is not defined or is empty. Please run the previous cells to populate it.")
elif 'image_folders_paths' not in globals() or not image_folders_paths:
    print("Error: 'image_folders_paths' is not defined. Please run the cell '80294ab9' to define it.")
else:
    print(f"Processing {len(json_basenames)} unique ISIC IDs...")

    for isic_id in sorted(list(json_basenames)): # Sort for consistent output order
        current_isic_data = {
            "isic_id": isic_id,
            "image_dir": None,
            "annotator_list": [],
            "participant_list": []
        }
        
        expected_json_filename = isic_id + ".json"
        expected_image_filename = isic_id + ".jpg"

        image_found = False
        for img_folder in image_folders_paths:
            potential_image_path = os.path.join(img_folder, expected_image_filename)
            if os.path.exists(potential_image_path):
                # Store relative path from the 'dataset_base_path' directory
                relative_image_path = os.path.relpath(potential_image_path, dataset_base_path)
                current_isic_data["image_dir"] = relative_image_path
                image_found = True
                break # Found the image, no need to check other folders
        
        if not image_found:
            print(f"Warning: Image file {expected_image_filename} not found for ISIC ID {isic_id} in provided image folders.")


        # Search in annotator folders
        if os.path.exists(annotator_annotations_path) and os.path.isdir(annotator_annotations_path):
            for annotator_folder_name in os.listdir(annotator_annotations_path):
                annotator_folder_full_path = os.path.join(annotator_annotations_path, annotator_folder_name)
                if os.path.isdir(annotator_folder_full_path):
                    json_file_path = os.path.join(annotator_folder_full_path, expected_json_filename)
                    if os.path.exists(json_file_path):
                        # Store relative path from the 'ham_concept_dataset' directory for clarity
                        relative_path = os.path.relpath(json_file_path, dataset_base_path)
                        current_isic_data["annotator_list"].append(relative_path)
        else:
            print(f"Warning: Annotator annotations path not found: {annotator_annotations_path}")

        # Search in participant folders
        if os.path.exists(participant_annotations_path) and os.path.isdir(participant_annotations_path):
            for participant_folder_name in os.listdir(participant_annotations_path):
                participant_folder_full_path = os.path.join(participant_annotations_path, participant_folder_name)
                if os.path.isdir(participant_folder_full_path):
                    json_file_path = os.path.join(participant_folder_full_path, expected_json_filename)
                    if os.path.exists(json_file_path):
                        relative_path = os.path.relpath(json_file_path, dataset_base_path)
                        current_isic_data["participant_list"].append(relative_path)
        else:
            print(f"Warning: Participant annotations path not found: {participant_annotations_path}")
            
        # Sort lists for consistent output
        current_isic_data["annotator_list"].sort()
        current_isic_data["participant_list"].sort()
        
        isic_annotation_data.append(current_isic_data)

    # Write the data to a JSON file
    try:
        with open(output_json_path, 'w') as f:
            json.dump(isic_annotation_data, f, indent=4)
        print(f"\nSuccessfully created JSON file: {os.path.abspath(output_json_path)}")
        if isic_annotation_data:
             print(f"First entry example: {json.dumps(isic_annotation_data[0], indent=4)}")
    except IOError as e:
        print(f"Error writing JSON file: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

if not isic_annotation_data and 'json_basenames' in globals() and json_basenames:
    print("No annotation data was compiled, though ISIC IDs were found. Check paths and file structures.")
elif not 'json_basenames' in globals() or not json_basenames:
    pass # Error already printed
else:
    print("Processing complete.")

Processing 3611 unique ISIC IDs...

Successfully created JSON file: /home/nqmtien/THESIS/REIT4841/ham-concept/ham_concept_dataset/annotation_summary.json
First entry example: {
    "isic_id": "ISIC_0024310",
    "image_dir": "ISIC2018_Task3_Training_Input/ISIC_0024310.jpg",
    "annotator_list": [
        "Datasets/ground_truth_annotations/annotator1/ISIC_0024310.json",
        "Datasets/ground_truth_annotations/annotator2/ISIC_0024310.json"
    ],
    "participant_list": []
}
Processing complete.
