# EASY pilot study: Confusion Matrix
This notebook demonstrates how the mask images downloaded for
one annotation study are being processed using both the
[Dice coefficient](https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient)
(see particularly Applications section), as well as a
cross-correlation measure operating on the smoothed masks.

In [7]:
# settings
annotationsFolder = 'Annotations'
studyName = 'EasyPilot'

In [8]:
# imports, etc.
import glob
import os
import pandas as pd
import re

In [29]:
# find annotation mask images
annotationImages = glob.glob('./%s/%s_*.png' % (annotationsFolder, studyName))
print('A total of %s images were found for study %s:' % (len(annotationImages), studyName))

# and determine the (unique) images, annotators, and features
imageNr = []
imageSet = set()
annotator = []
annotatorSet = set()
feature = []
featureSet = set()
reXPattern = re.compile(r'^.*' + studyName + r'_ISIC_(\d+)_([^_]+)_([^_]+)\.png$')
for imageFilename in annotationImages:
    reSearch = reXPattern.search(imageFilename)
    if reSearch:
        reGroups = reSearch.groups()
        imageNr.append(reGroups[0])
        if not reGroups[0] in imageSet:
            imageSet.add(reGroups[0])
        annotator.append(reGroups[1])
        if not reGroups[1] in annotatorSet:
            annotatorSet.add(reGroups[1])
        feature.append(reGroups[2])
        if not reGroups[2] in featureSet:
            featureSet.add(reGroups[2])
    else:
        imageNr.append('')
        annotator.append('')
        feature.append('')
print(' - a total of %s ISIC images were annotated' % (len(imageSet)))
print(' - by %s annotators' % (len(annotatorSet)))
print(' - using %s features' % (len(featureSet)))
annotDF = pd.DataFrame.from_dict({'imageNr': imageNr, 'annotator': annotator, 'feature': feature})

A total of 1925 images were found for study EasyPilot:
 - a total of 140 ISIC images were annotated
 - by 5 annotators
 - using 80 features
