In [46]:
from pprint import pprint as pp

FILENAME = "official-2014.combined-withalt.m2"
NUM_ANNOTATORS = 5


def agreement(row):
    """Custom function to calculate agreement between annotators
    Basically, it returns the % of annotators that made the same decision,
    regardless if it's 0 or 1.
    
    This function is only intended to work with rows of 0s and 1s."""
    
    return max(sum(row), abs(sum(row) - len(row))) / len(row)


def calc_iaa_from_input(filename, ignore_type=False):
    """
    Since we are only calculating agreement, we don't essentially care
    about the actual content of the sentences, but rather – how many 
    annotators made a certain revision and how many didn't. 
        
    Thus, we propose to build an alignment matrix using which we can later 
    calculate inter-annotator alignment.
        
    The matrix would be formatted as follows:
        
        Num | A1 | A2 | An
        1   | 0  | 1  | 0
        2   | 1  | 1  | 1
        3   | 0  | 1  | 0
                                                    
    Where: 
        
    Num  – number of annotation
    A1...An - annotators
    0 or 1 - whether annotator Ax made annotation #y
    
    To build this matrix, we shall keep a dict where the annotation is key and a
    row of annotators is the value like so:
    
    [1, 1, 0, 0, 1] - this means that annotators 1, 2 and 5 made the same annotation.
    
    """
    
    f = open(FILENAME, 'r')
    annotations = {}
    n = 0
    
    for line in f:
        
        line = line.strip()
        
        # Start processing a new sentence
        if len(line) > 0 and line[0] == 'S':
            pass
        # Process annotation
        elif len(line) > 0 and line[0] == 'A':
            annotation_parts = line.split('|||')
            
            annotator_number = int(annotation_parts[-1])
            annotation_start, annotation_end = annotation_parts[0][2:].split()
            annotation_span = (int(annotation_start), int(annotation_end))
            
            # If `ignore_type is set`, annotations with the same span and word 
            # will be considered as one, regardless of their type
            if not ignore_type:
                annotation_type = annotation_parts[1]
            else:
                annotation_type = 'ignore'
            
            annotation_word = annotation_parts[2]
            
            annotation_content = (n, annotation_span, annotation_type, annotation_word)
            if not annotation_content in annotations.keys():
                annotations[annotation_content] = [0 for x in range(NUM_ANNOTATORS)]
                annotations[annotation_content][annotator_number] = 1
            else:
                annotations[annotation_content][annotator_number] = 1
        # Next sentence
        else:
            n += 1
    
    # Calculate agreement
    agreement_list = []
    for ann in annotations.values():
        agreement_list.append(agreement(ann))
    
    # pp(annotations)
    # print(agreement_list)
    
    return sum(agreement_list) / len(agreement_list)
        
        
print(calc_iaa_from_input(FILENAME, ignore_type=False))

0.7519972091401207


In [16]:
r1 = [0, 0, 0, 1] # 3 = .75
r2 = [1, 1, 1, 1] # 4 = 1
r3 = [1, 1, 0, 0] # 2 = .5
r4 = [0, 0, 0, 0]
r5 = [0, 1, 1, 1]
    
agreement(r5)

0.75