### NUCLE Error Corpus

In [1]:
import re
from collections import Counter

In [2]:
with open('official-2014.combined-withalt.m2') as fr:
    lines = fr.readlines()

In [3]:
corr_type_re = re.compile(r'(?<=\|\|\|).+?(?=\|\|\|)')

max_annotators = 5

def zero_matrix():
    return [[0.0000001 for x in range(max_annotators)] for y in range(max_annotators)]

def print_matrix(matrix, sign=''):
    print('\n'.join(['\t'.join([str(y)+sign for y in x]) for x in matrix]))

In [4]:
def compute_stat(correction_type=None):
    
    agreed_annotations = zero_matrix()
    combined_annotations = zero_matrix()
    sentence_total_annotations:int = 0
    sentence_annotations = []
    
    def calculate_stat_for_sentence():
        annotators_counter = Counter()
        # calculate number of identical annotations
        for a in sentence_annotations:
            annotators_counter[a[0]] += 1
            for b in sentence_annotations:
                if a[1] == b[1]: # annotations are identical
                    agreed_annotations[a[0]][b[0]] += 1

        # calculate average combined no of annotations
        for a_id, a_annotations in annotators_counter.items():
            for b_id, b_annotations in annotators_counter.items():
                combined_annotations[a_id][b_id] += (a_annotations + b_annotations)/2
    
    for l in lines:
        if l.startswith('S'):
            # calculate statistics for the previous sentence
            calculate_stat_for_sentence()
            sentence_annotations.clear()

        elif l.startswith('A'):
            annotator = int(l[-2:-1])
            annotation:str = l[2:-5]
            corr_type = corr_type_re.search(l).group(0)
            if not correction_type or correction_type == corr_type:
                sentence_annotations.append((annotator, annotation))
            # print(f'Annotation: "{annotation}" by {annotator}')

    # calculate statistics for the last sentence
    calculate_stat_for_sentence()
    
    combined_percentage = [[int(agreed_annotations[a][b]/combined_annotations[a][b]*100) for a in range(max_annotators)] for b in range(max_annotators)]
    
    numerator = 0
    denominator = 0
    for a in range(max_annotators - 1):
        for b in range(a + 1, max_annotators):
            numerator += combined_percentage[a][b] * combined_annotations[a][b]
            denominator += combined_annotations[a][b]

    return numerator / denominator

In [5]:
# https://github.com/andabi/deep-text-corrector/tree/master/data/conll14st-test-data
corrections = {
    'Vt':	     'Verb tense',
    'Vm':	     'Verb modal',
    'V0':	     'Missing verb',
    'Vform':	     'Verb form',
    'SVA':	     'Subject-verb-agreement',
    'ArtOrDet':     'Article or Determiner',
    'Nn':	     'Noun number',
    'Npos':	     'Noun possesive',
    'Pform':	     'Pronoun form',
    'Pref':	     'Pronoun reference',
    'Prep':         'Preposition',
    'Wci':	     'Wrong collocation/idiom',
    'Wa':	     'Acronyms',
    'Wform':	     'Word form',
    'Wtone':	     'Tone',
    'Srun':	     'Runons, comma splice',
    'Smod':	     'Dangling modifier',
    'Spar':	     'Parallelism',
    'Sfrag':	     'Fragment',
    'Ssub':	     'Subordinate clause',
    'WOinc':	     'Incorrect sentence form',
    'WOadv':	     'Adverb/adjective position',
    'Trans':	     'Link word/phrases',
    'Mec':	     'Punctuation, capitalization, spelling, typos',
    'Rloc-':	     'Local redundancy',
    'Cit':	     'Citation',
    'Others':	     'Other errors',
    'Um':	     'Unclear meaning (cannot be corrected)'
}

#### General inter-annotator agreement

In [6]:
print(f'Weighted average of inter-annotator agreement: {compute_stat():.3}%')

Weighted average of inter-annotator agreement: 35.1%


#### Specific inter-annotator agreement

In [7]:
for i, corr_type in enumerate(corrections):
    print(f'{corrections[corr_type]}:\t{compute_stat(corr_type):.4}%')

Verb tense:	76.46%
Verb modal:	58.58%
Missing verb:	85.29%
Verb form:	77.21%
Subject-verb-agreement:	84.3%
Article or Determiner:	62.74%
Noun number:	79.98%
Noun possesive:	78.39%
Pronoun form:	87.59%
Pronoun reference:	67.99%
Preposition:	70.96%
Wrong collocation/idiom:	47.31%
Acronyms:	100.0%
Word form:	72.72%
Tone:	83.33%
Runons, comma splice:	78.09%
Dangling modifier:	100.0%
Parallelism:	100.0%
Fragment:	100.0%
Subordinate clause:	76.33%
Incorrect sentence form:	77.8%
Adverb/adjective position:	88.89%
Link word/phrases:	60.26%
Punctuation, capitalization, spelling, typos:	67.71%
Local redundancy:	66.96%
Citation:	100.0%
Other errors:	67.37%
Unclear meaning (cannot be corrected):	63.03%
