In [1]:
import json
import os
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
ANNOTATION_SUMMARY_PATH = "../ham_concept_dataset/annotation_summary.json"

data = []
if os.path.exists(ANNOTATION_SUMMARY_PATH):
    with open(ANNOTATION_SUMMARY_PATH, 'r') as f:
        try:
            data = json.load(f)
            print(f"Successfully loaded {len(data)} entries from {ANNOTATION_SUMMARY_PATH}")
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON: {e}")
else:
    print(f"Error: Annotation summary file not found at {ANNOTATION_SUMMARY_PATH}")

# Display the first few entries to verify
if data:
    print("\nFirst 3 entries:")
    for i, entry in enumerate(data[:3]):
        print(f"Entry {i+1}: {entry}")

Successfully loaded 3611 entries from ../ham_concept_dataset/annotation_summary.json

First 3 entries:
Entry 1: {'isic_id': 'ISIC_0024310', 'image_dir': 'ISIC2018_Task3_Training_Input/ISIC_0024310.jpg', 'annotator_list': ['Datasets/ground_truth_annotations/annotator1/ISIC_0024310.json', 'Datasets/ground_truth_annotations/annotator2/ISIC_0024310.json'], 'participant_list': []}
Entry 2: {'isic_id': 'ISIC_0024313', 'image_dir': 'ISIC2018_Task3_Training_Input/ISIC_0024313.jpg', 'annotator_list': ['Datasets/ground_truth_annotations/annotator1/ISIC_0024313.json', 'Datasets/ground_truth_annotations/annotator3/ISIC_0024313.json'], 'participant_list': []}
Entry 3: {'isic_id': 'ISIC_0024314', 'image_dir': 'ISIC2018_Task3_Training_Input/ISIC_0024314.jpg', 'annotator_list': ['Datasets/ground_truth_annotations/annotator1/ISIC_0024314.json', 'Datasets/ground_truth_annotations/annotator3/ISIC_0024314.json'], 'participant_list': []}


In [3]:
y_ann = []  # Has annotator annotations
y_pas = []  # Has participant annotations

if data:
    for item in data:
        # Check for annotator_list and participant_list existence and type
        has_annotator = isinstance(item.get('annotator_list'), list) and len(item['annotator_list']) > 0
        has_participant = isinstance(item.get('participant_list'), list) and len(item['participant_list']) > 0
        
        y_ann.append(has_annotator)
        y_pas.append(has_participant)

    print(f"Processed {len(y_ann)} items for confusion matrix.")
    print(f"Number of items with annotator annotations : {sum(y_ann)}")
    print(f"Number of items with participant annotations : {sum(y_pas)}")
else:
    print("No data loaded to prepare labels.")

Processed 3611 items for confusion matrix.
Number of items with annotator annotations : 3611
Number of items with participant annotations : 196


In [4]:
PARTICIPANT_ONLY_JSON_PATH = "../ham_concept_dataset/participant_only.json"
participant_only_data = []
if data:
    for item in data:
        # Check if 'participant_list' exists, is a list, and is not empty
        participant_list = item.get('participant_list')
        if isinstance(participant_list, list) and len(participant_list) > 0:
            participant_only_data.append(item)
    
    print(f"Found {len(participant_only_data)} entries with non-empty participant_list.")

    # Save the filtered data
    try:
        with open(PARTICIPANT_ONLY_JSON_PATH, 'w') as f:
            json.dump(participant_only_data, f, indent=4)
        print(f"Successfully saved participant-only data to: {os.path.abspath(PARTICIPANT_ONLY_JSON_PATH)}")
            
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        
else:
    print("No data loaded from annotation_summary.json to filter.")

Found 196 entries with non-empty participant_list.
Successfully saved participant-only data to: /home/nqmtien/THESIS/REIT4841/ham-concept/ham_concept_dataset/participant_only.json

First 3 entries in participant_only.json:
Entry 1: {'isic_id': 'ISIC_0024319', 'image_dir': 'ISIC2018_Task3_Training_Input/ISIC_0024319.jpg', 'annotator_list': ['Datasets/ground_truth_annotations/annotator1/ISIC_0024319.json', 'Datasets/ground_truth_annotations/annotator7/ISIC_0024319.json'], 'participant_list': ['Datasets/study_annotations/participant12/ISIC_0024319.json', 'Datasets/study_annotations/participant46/ISIC_0024319.json', 'Datasets/study_annotations/participant62/ISIC_0024319.json', 'Datasets/study_annotations/participant65/ISIC_0024319.json', 'Datasets/study_annotations/participant7/ISIC_0024319.json', 'Datasets/study_annotations/participant74/ISIC_0024319.json', 'Datasets/study_annotations/participant86/ISIC_0024319.json']}
Entry 2: {'isic_id': 'ISIC_0024428', 'image_dir': 'ISIC2018_Task3_Trai