In [36]:
# Choose the directory for annotations
import json
import os

annotation_set = "describe-bird-attributes-2"

annotations_dir = annotation_set + "/annotations/consolidated-annotation/consolidation-request/iteration-1/"


In [38]:
# Create list of all birds and their individual annotations
# Structure: Nested dictionaries. key = Bird ID. Dictionary contains bird name and annotations
'''
{
    BirdID: {
        Bird Filename
        Labels []
    }
}
'''

directory = os.fsencode(annotations_dir)
bird_annotations = {}

for file in os.listdir(directory):
    filename = annotations_dir + os.fsdecode(file)
    with open(filename, "r") as f:
        data = json.load(f)
    f.close()
    
    for bird in data:
        bird_annotations[bird["datasetObjectId"]] = {}

        curr_labels = {}
        #32 = where file name starts
        curr_labels["bird_filename"] = bird["dataObject"]["s3Uri"][32:] 
        curr_labels["labels"] = []

        for annotation in bird["annotations"]:
            # 36 = where label starts. -3 = cut off the last extra characters ("}})
            curr_labels["labels"].append(annotation["annotationData"]["content"][36:-3]) 

        bird_annotations[bird["datasetObjectId"]] = curr_labels


In [45]:
# Add confidence score and aggregate label
annotations_dir = annotation_set + "/annotations/consolidated-annotation/consolidation-response/iteration-1/"

directory = os.fsencode(annotations_dir)
for file in os.listdir(directory):
    filename = annotations_dir + os.fsdecode(file)
    with open(filename, "r") as f:
        data = json.load(f)
    f.close()
    
    for bird in data:
        bird_annotations[bird["datasetObjectId"]]["aggregated_label"] = bird["consolidatedAnnotation"]["content"][annotation_set + "-metadata"]["class-name"]
        bird_annotations[bird["datasetObjectId"]]["confidence"] = bird["consolidatedAnnotation"]["content"][annotation_set + "-metadata"]["confidence"]

In [58]:
# Add average time per annotation
annotations_dir = annotation_set + "/annotations/worker-response/iteration-1/"

for bird_id in os.listdir(annotations_dir):
    bird_path = annotations_dir + bird_id + "/"
    for file in os.listdir(bird_path):       
        filename = bird_path + os.fsdecode(file)
        with open(filename, "r") as f:
            data = json.load(f)
        f.close()
        
        sum = 0
        count = 0
        for answer in data["answers"]:
            sum += answer["timeSpentInSeconds"]
            count += 1.0
        
        bird_annotations[bird_id]["avg_time"] = sum / count
    

In [59]:
# View the bird annotations
dict(sorted(bird_annotations.items()))

{'0': {'bird_filename': 'American_Goldfinch_0022_32111.jpg',
  'labels': ['Multi-colored', 'Spotted', 'Multi-colored'],
  'aggregated_label': 'Spotted',
  'confidence': 0.69,
  'avg_time': 133.0},
 '1': {'bird_filename': 'Blue_Grosbeak_0066_36632.jpg',
  'labels': ['Multi-colored', 'Striped', 'Spotted'],
  'aggregated_label': 'Solid',
  'confidence': 0.45,
  'avg_time': 85.66666666666667},
 '10': {'bird_filename': 'Purple_Finch_0036_27641.jpg',
  'labels': ['Spotted', 'Solid', 'Spotted'],
  'aggregated_label': 'Spotted',
  'confidence': 0.81,
  'avg_time': 198.66666666666666},
 '11': {'bird_filename': 'Song_Sparrow_0118_121905.jpg',
  'labels': ['Spotted', 'Striped', 'Striped'],
  'aggregated_label': 'Striped',
  'confidence': 0.81,
  'avg_time': 86.33333333333333},
 '12': {'bird_filename': 'Swainson_Warbler_0026_794884.jpg',
  'labels': ['Solid', 'Spotted', 'Solid'],
  'aggregated_label': 'Solid',
  'confidence': 0.95,
  'avg_time': 134.66666666666666},
 '13': {'bird_filename': 'Tree_