In [None]:
# imports
import os
import numpy
from isicarchive.api import IsicApi

# please change the username accordingly!
username = 'weberj3@mskcc.org'

# root folder for all ISIC related data
doc_folder = 'Z:\\10.Imaging Informatics\\'

# cache folder
cache_folder = doc_folder + 'ISIC' + os.sep + 'cache'

# show URL requests (for debugging purposes only!)
debug = False

# instantiate API object
api = IsicApi(username, cache_folder=cache_folder, debug=debug)

In [None]:
# function for mean and sample STD
def mean_std(a:list, is_sample:bool=True):
    ddof = 1 if is_sample else 0
    return (numpy.mean(a), numpy.std(a, ddof=ddof))

In [None]:
# study folder
study_folder = doc_folder + 'EASY' + os.sep + 'PILOT' + os.sep

# load study and data
study = api.study('ISIC Annotation Study - All Features')
study.cache_image_data()
study.load_annotations()

In [None]:
# load meta data
meta_data_url = 'https://raw.githubusercontent.com/neuroelf/isicarchive/master/data/EASY_pilot_diagnoses.csv'
study.load_meta_data(meta_data_url, list_to_dict=True,
    dict_key='name', extract_key=['diagnosis', 'exemplar'])

In [None]:
# select only from users that completed the study
completion = 140
users = [u for (u, c) in study.user_completion.items() if c == completion]
# users

In [None]:
# create heatmaps with default settings
mix_colors = False
underlay_gray = 0.8
study_stats = study.image_heatmaps(study_folder, users=users,
    mix_colors=mix_colors, underlay_gray=underlay_gray)

In [None]:
# and create a dictionary mapping diagnosis to a list of images
diag_images = dict()
for (name, diag) in study.meta_data['diagnosis'].items():
    if not diag in diag_images:
        diag_images[diag] = []
    diag_images[diag].append(name)
# diag_images

In [None]:
# same for exemplar features
exem_images = dict()
for (name, exemplar) in study.meta_data['exemplar'].items():
    if not exemplar:
        continue
    if not exemplar in exem_images:
        exem_images[exemplar] = []
    exem_images[exemplar].append(name)
# exem_images

In [None]:
# how many annotations (mean, std) for each of the diagnoses?
print('On average, images with diagnosis ...')
for diagnosis in sorted(diag_images.keys()):
    study.select_annotations(images=diag_images[diagnosis], users=users)
    ao = [a for a in study.annotation_selection.values()]
    fn = [None] * len(ao)
    for (idx,a) in enumerate(ao):
        fn[idx] = len(a.features)
    (m,s) = mean_std(fn)
    print(' - "{0:s}" have {1:.2f} ± {2:.2f} annotations.'.format(diagnosis, m, s))

In [None]:
# how many exemplar feature annotations for each image with an exemplar?
for exemplar in sorted(exem_images.keys()):
    images = exem_images[exemplar]
    print('Exemplar "{0:s}" with {1:d} images:'.format(exemplar, len(images)))
    for image in images:
        imag_diag = study.meta_data['diagnosis'][image]
        study.select_annotations(images=[image], users=users)
        ao = [a for a in study.annotation_selection.values()]
        found_direct = 0
        found_category = 0
        found_specific = 0
        for (idx,a) in enumerate(ao):
            if exemplar in a.features:
                found_direct += 1
                found_category += 1
                found_specific +=1
                continue
            found_at_all = False
            for feature in a.features:
                feature = feature.split(' : ')
                if exemplar[0:len(feature[0])] == feature[0]:
                    found_category += 1
                    found_at_all = True
                if feature[-1] in exemplar:
                    found_specific += 1
                    found_at_all = True
                if found_at_all:
                    break
        print((' - {0:s} ({1:s}) has {2:d} annotations; {3:d}, {4:d}, and {5:d}' +
               ' with the full, category, and specific exemplar').format(
                image, imag_diag, len(ao), found_direct, found_category, found_specific))

In [None]:
# for each image, test whether all five raters agreed on one feature
image_agreed = [False] * len(study.images)
image_agreed_features = [[] for l in range(len(study.images))]
for (idx, image) in enumerate(study.images):
    image_id = image['_id']
    study.select_annotations(images=[image_id], users=users)
    ao = [a for a in study.annotation_selection.values()]
    for feature in ao[0].features.keys():
        agreed = [False] * len(ao)
        feature_syns = api.feature_synonyms(feature)
        for (aidx, a) in enumerate(ao):
            for f in feature_syns:
                if f in a.features:
                    agreed[aidx] = True
                    break
        if all(agreed):
            image_agreed[idx] = True
            image_agreed_features[idx].append(feature)
print(('There are {0:d} images where all raters agreed on ' +
       'at least one specific feature being present:').format(numpy.sum(image_agreed)))
total_agreements = 0
for (image, a, af) in zip(study.images, image_agreed, image_agreed_features):
    if a:
        total_agreements += len(af)
        image_name = image['name']
        try:
            image_diag = study.meta_data['diagnosis'][image_name]
        except:
            image_diag = 'missing diagnosis'
        print(' - {0:s} ({1:s}): {2:s}'.format(image_name,
            image_diag, ', '.join(af)))
print('This means a total of {0:d} agreements.'.format(total_agreements))

In [None]:
# count orphan features
orphans = 0
orphan_image_features = []
orphan_features = dict()
for (idx, image) in enumerate(study.images):
    image_id = image['_id']
    image_name = image['name']
    study.select_annotations(images=[image_id], users=users)
    ao = [a for a in study.annotation_selection.values()]
    for (aidx, a) in enumerate(ao):
        for feature in a.features:
            is_orphan = True
            feature_syns = api.feature_synonyms(feature)
            for (aidx2, a2) in enumerate(ao):
                if aidx == aidx2:
                    continue
                for feature2 in a2.features:
                    if feature2 in feature_syns:
                        is_orphan = False
                        break
                if not is_orphan:
                    break
            if is_orphan:
                orphans += 1
                orphan_image_features.append(feature + ' in ' + image_name + ' by ' + a.user['name'])
                if not feature_syns[0] in orphan_features:
                    orphan_features[feature_syns[0]] = 0
                orphan_features[feature_syns[0]] += 1
print('{0:d} orphan features were selected:'.format(orphans))
for orphaned in orphan_image_features:
    print(' - ' + orphaned)
print('Presenting by list of features (collapsing synonyms):')
for feature in sorted(orphan_features.keys()):
    print(' - {0:-2d} times "{1:s}"'.format(orphan_features[feature], feature))

In [None]:
# show the first image of the study
from matplotlib import pyplot
%matplotlib inline

image = api.image(study.images[0])
image.load_image_data()
rimage = api.resample_image(image.data, (1200,1600))
api.set_text_in_image(rimage, 'Image: ' + image.name, fsize=0.04, min_alpha=1.0, fcolor=[0,0,0], bcolor=[255,255,255])

pyplot.figure(figsize=(16,12))
pyplot.imshow(rimage)
pyplot.show()