In [2]:
import os
import pandas as pd
import numpy as np
import json
from skimage import draw
from skimage import io
from skimage import transform
from tqdm.auto import tqdm

image_size=224

char_class_labels = ['TRBL', 'ESA', 'BDG', 'GP', 'PV', 'PRL', 'WLSA', 'PLR', 'PES', 'PIF', 'OPC', 'SPC', 'MVP', 'PRLC', 'PLF', 'PDES', 'APC', 'MS']

annotator_map = {'1': '1', 'gt1': '2', 'gt2': '3', 'gt3': '4', 'gt4': '5', 'gt7': '6', 'gt10': '7', 'gt12': '8',
                'gt13': '9', 'gt14': '10', 'gt15': '11', 'gt16': '12', 'gt17': '13', 'gt19': '14'}

def polygon2mask(polygon):
    """
    Create an image mask from polygon coordinates
    """
    vertex_row_coords, vertex_col_coords, shape = polygon[:, 1], polygon[:, 0], (450, 600)
    
    fill_row_coords, fill_col_coords = draw.polygon(vertex_row_coords, vertex_col_coords, shape)
    mask = np.zeros(shape, dtype=float)
    mask[fill_row_coords, fill_col_coords] = 1.
    mask = transform.resize(mask, (image_size, image_size))
    return mask.astype(np.int16)

def process_annotations(y_annotations):
    masks = [polygon2mask(ann) for ann in y_annotations]
    mask = np.bitwise_or.reduce(masks)
    return mask

# Ground Truth

In [25]:
metadata_gt = pd.read_pickle("/home/kti01/Documents/My Files/Projects/Overlap/data/ground_truth/metadata_gt_consolidated.pkl")
save_path = "/home/kti01/Documents/My Files/Projects/Overlap/data/ground_truth/for_uploading/ground_truth_annotations"
metadata_gt['annotator'] = metadata_gt['annotator'].replace(annotator_map)
metadata = metadata_gt[['lesion_id', 'image_id', 'dx', 'dx_type', 'age', 'sex', 'localization',
       'dataset', 'benign_malignant', 'APC', 'BDG', 'ESA', 'GP', 'MS', 'MVP', 'None', 'OPC',
       'PDES', 'PES', 'PIF', 'PLF', 'PLR', 'PRL', 'PRLC', 'PV', 'SPC', 'TRBL',
       'WLSA', 'annotator']].copy()

In [68]:
# Iterate over each unique annotator in the 'metadata_gt' DataFrame
for annotator in tqdm(metadata_gt.annotator.unique()):
    # Filter the DataFrame to get data specific to the current annotator
    annotator_df = metadata_gt[metadata_gt.annotator == annotator]
    # Iterate over each unique image_id associated with the current annotator's data
    for image_id in tqdm(annotator_df.image_id.unique(), position=0, leave=True):
        # Initialize an empty dictionary to store annotations for the current image
        annotations = {}
        
        # Filter the DataFrame to get data specific to the current image_id
        df = annotator_df[annotator_df.image_id == image_id]
        
        # Iterate over the list of ontology features
        for feature in char_class_labels:
            # Check if the last row of the filtered DataFrame contains a valid annotation for the current feature
            #  This df contains only one row so iloc[0] and iloc[-1] is equivalent here
            if df.iloc[-1][feature+'_annotation'] != -1:
                # Retrieve the annotation for the current feature
                ann = df.iloc[-1][feature+'_annotation']
                
                # Convert the NumPy array in the annotation to a list and store it in 'annotations' dictionary
                np_to_list = []
                for i in range(len(ann)):
                    np_to_list.append(ann[i].tolist())
                annotations[feature] = np_to_list
                
        # Convert 'annotations' dictionary to a JSON string        
        annotations = json.dumps(annotations)
        # Save the JSON file with the annotations for the current image and annotator
        with open(os.path.join(save_path, 'annotator'+annotator, image_id+'.json'), 'w') as f:
            json.dump(annotations, f)


100%|██████████████████████████████████████| 3611/3611 [00:13<00:00, 262.87it/s]
100%|████████████████████████████████████████| 438/438 [00:01<00:00, 303.85it/s]
100%|████████████████████████████████████████| 215/215 [00:00<00:00, 352.97it/s]
100%|████████████████████████████████████████| 163/163 [00:00<00:00, 356.10it/s]
100%|████████████████████████████████████████| 196/196 [00:00<00:00, 289.68it/s]
100%|████████████████████████████████████████| 222/222 [00:00<00:00, 271.00it/s]
100%|████████████████████████████████████████| 210/210 [00:00<00:00, 268.07it/s]
100%|████████████████████████████████████████| 222/222 [00:00<00:00, 364.46it/s]
100%|████████████████████████████████████████| 199/199 [00:00<00:00, 366.12it/s]
100%|████████████████████████████████████████| 217/217 [00:00<00:00, 318.75it/s]
100%|████████████████████████████████████████| 186/186 [00:00<00:00, 272.04it/s]
100%|████████████████████████████████████████| 199/199 [00:00<00:00, 270.79it/s]
100%|███████████████████████

In [65]:
try:
    with open("/home/kti01/Documents/My Files/Projects/Overlap/data/ground_truth/for_uploading/annotations/annotator1/ISIC_0033712.json", 'rb') as f:
        annotation = json.load(f)
except Exception as e:
    print(e)

In [71]:
metadata.to_csv("/home/kti01/Documents/My Files/Projects/Overlap/data/ground_truth/for_uploading/metadata/metadata.csv", index=False)

# Phase 1

In [46]:
metadata = pd.read_pickle("/home/kti01/Documents/My Files/Projects/Overlap/data/phase1/metadata_phase1.pkl").fillna(-1)
save_path = "/home/kti01/Documents/My Files/Projects/Overlap/data/ground_truth/for_uploading/study_annotations"
metadata['participant'] = (pd.factorize(metadata['participant'])[0]+1).astype(str)

metadata.drop('mask', axis=1, inplace=True)
metadata.drop('language', axis=1, inplace=True)

In [49]:
# Iterate over each unique annotator in the 'metadata_gt' DataFrame
for participant in tqdm(metadata.participant.unique(), position=0, leave=True):
    participant_df = metadata[metadata.participant == participant]
    
    for image_id in participant_df.image_id.unique():
        # Initialize an empty dictionary to store annotations for the current image
        annotations = {}
        # Filter the DataFrame to get data specific to the current image_id
        df = participant_df[participant_df.image_id == image_id]
        
        # Iterate over the list of ontology features
        for feature in char_class_labels:
            # Check if the last row of the filtered DataFrame contains a valid annotation for the current feature
            #  This df contains only one row so iloc[0] and iloc[-1] is equivalent here
            if df.iloc[-1][feature+'_annotation'] != -1:
                # Retrieve the annotation for the current feature
                ann = df.iloc[-1][feature+'_annotation']
                # Convert the NumPy array in the annotation to a list and store it in 'annotations' dictionary
                np_to_list = []
                for i in range(len(ann)):
                    np_to_list.append(ann[i].tolist())
                annotations[feature] = np_to_list
        
        # Convert 'annotations' dictionary to a JSON string        
        annotations = json.dumps(annotations)
        
        # Create participant folder
        if not os.path.exists(os.path.join(save_path, 'participant'+participant)):
            os.mkdir(os.path.join(save_path, 'participant'+participant))
        # Save the JSON file with the annotations for the current image and annotator
        with open(os.path.join(save_path, 'participant'+participant, image_id+'.json'), 'w') as f:
            json.dump(annotations, f)

        

100%|█████████████████████████████████████████| 113/113 [00:05<00:00, 20.64it/s]


In [53]:
metadata.to_csv("/home/kti01/Documents/My Files/Projects/Overlap/data/ground_truth/for_uploading/metadata/metadata_study.csv", index=False)