# Lex Rosetta: Inter Annotator Agreement

In [1]:
import os
import pandas as pd
from pathlib import Path

In [2]:
PWD = Path()
DATASETS_DIR = PWD/'data'
DATASET_NAMES = {
    'Canada-EN-1',
    'Czech_Republic-CZ-1',
    'France-FR-1',
    'United_States-EN-1'
}
IA_FILE_NAME = 'ia_agreement.csv'

L0_OoS = 'L0 Out of Scope'
L0_H = 'L0 Heading'
L1_BG = 'L1 Background'
L1_A = 'L1 Analysis'
L2_IS = 'L2 Introductory Summary'
L2_OUT = 'L2 Outcome'
NM = 'Not Marked'
TYPES = (L0_OoS, L0_H, L1_BG, L1_A, L2_IS, L2_OUT, NM)
RW_DICT = {L0_OoS: 'OoS',
           L0_H: 'Head',
           L1_BG: 'Back',
           L1_A: 'Anl',
           L2_IS: 'Int.S.',
           L2_OUT: 'Out',
           NM: 'NM'}

## Create Pandas Data Frame

In [3]:
dataset_dfs = []
for dataset_name in DATASET_NAMES:
    dataset_df = pd.read_csv(DATASETS_DIR/dataset_name/IA_FILE_NAME)
    dataset_df['Dataset'] = [dataset_name] * dataset_df.shape[0]
    dataset_dfs.append(dataset_df)

data_df = pd.concat(dataset_dfs)

In [4]:
data_df.head()

Unnamed: 0,Document,L0 Out of Scope-L0 Out of Scope,L0 Out of Scope-L0 Heading,L0 Out of Scope-L1 Background,L0 Out of Scope-L1 Analysis,L0 Out of Scope-L2 Introductory Summary,L0 Out of Scope-L2 Outcome,L0 Out of Scope-Not Marked,L0 Heading-L0 Out of Scope,L0 Heading-L0 Heading,...,L2 Outcome-L2 Outcome,L2 Outcome-Not Marked,Not Marked-L0 Out of Scope,Not Marked-L0 Heading,Not Marked-L1 Background,Not Marked-L1 Analysis,Not Marked-L2 Introductory Summary,Not Marked-L2 Outcome,Not Marked-Not Marked,Dataset
0,Canada-EN-1-1.txt,755,0,0,0,0,0,0,0,0,...,71,0,0,0,0,0,0,0,4,Canada-EN-1
1,Canada-EN-1-2.txt,669,0,0,0,0,0,0,0,0,...,179,1,0,0,0,0,0,0,7,Canada-EN-1
2,Canada-EN-1-3.txt,402,0,0,0,0,0,1,0,0,...,88,0,3,0,0,0,0,0,7,Canada-EN-1
3,Canada-EN-1-4.txt,591,0,0,0,0,0,3,0,0,...,199,0,0,0,2,0,0,0,4,Canada-EN-1
4,Canada-EN-1-5.txt,249,0,0,0,0,0,2,0,0,...,41,0,0,0,0,1,0,0,8,Canada-EN-1


## Confusion Matrices

In [5]:
sums_df = data_df.groupby(['Dataset']).sum()

In [6]:
sums_df

Unnamed: 0_level_0,L0 Out of Scope-L0 Out of Scope,L0 Out of Scope-L0 Heading,L0 Out of Scope-L1 Background,L0 Out of Scope-L1 Analysis,L0 Out of Scope-L2 Introductory Summary,L0 Out of Scope-L2 Outcome,L0 Out of Scope-Not Marked,L0 Heading-L0 Out of Scope,L0 Heading-L0 Heading,L0 Heading-L1 Background,...,L2 Outcome-L2 Introductory Summary,L2 Outcome-L2 Outcome,L2 Outcome-Not Marked,Not Marked-L0 Out of Scope,Not Marked-L0 Heading,Not Marked-L1 Background,Not Marked-L1 Analysis,Not Marked-L2 Introductory Summary,Not Marked-L2 Outcome,Not Marked-Not Marked
Dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Canada-EN-1,96478,136,4,0,0,2001,344,0,9197,466,...,0,44430,49,157,0,57,44,0,3,647
Czech_Republic-CZ-1,157039,72,0,0,0,0,4578,1025,7468,1396,...,0,31629,128,513,5,95,37,0,0,781
France-FR-1,950468,0,6102,82,0,0,58810,0,8876,126,...,0,42321,3,41,1,26,122,0,1,588
United_States-EN-1,70586,6,244,298,1346,1013,81,2601,34888,514,...,0,64381,36,461,28,418,527,34,278,775


In [7]:
sums_dict = sums_df.to_dict(orient='index')

In [8]:
for dataset in sums_dict:
    print(dataset)
    print(f' & {" & ".join(RW_DICT[t] for t in TYPES)}\\\\')
    all_count = sum(sums_dict[dataset].values())
    for t1 in TYPES:
        stat_line = []
        for t2 in TYPES:
            stat_line.append(sums_dict[dataset][f'{t1}-{t2}'])
        print(f'{RW_DICT[t1]} & {" & ".join("%.1f" % (s*100/all_count) for s in stat_line)}\\\\')

Canada-EN-1
 & OoS & Head & Back & Anl & Int.S. & Out & NM\\
OoS & 5.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.1 & 0.0\\
Head & 0.0 & 0.5 & 0.0 & 0.1 & 0.0 & 0.1 & 0.0\\
Back & 0.0 & 0.0 & 24.5 & 2.3 & 0.0 & 0.0 & 0.0\\
Anl & 0.0 & 0.0 & 2.4 & 61.9 & 0.0 & 0.2 & 0.0\\
Int.S. & 0.0 & 0.0 & 0.1 & 0.0 & 0.1 & 0.0 & 0.0\\
Out & 0.0 & 0.0 & 0.0 & 0.3 & 0.0 & 2.3 & 0.0\\
NM & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
Czech_Republic-CZ-1
 & OoS & Head & Back & Anl & Int.S. & Out & NM\\
OoS & 6.4 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.2\\
Head & 0.0 & 0.3 & 0.1 & 0.1 & 0.0 & 0.0 & 0.0\\
Back & 0.9 & 0.0 & 30.7 & 0.7 & 0.0 & 0.0 & 0.0\\
Anl & 0.3 & 0.0 & 0.7 & 56.6 & 0.0 & 0.2 & 0.0\\
Int.S. & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
Out & 0.0 & 0.0 & 0.0 & 1.3 & 0.0 & 1.3 & 0.0\\
NM & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
France-FR-1
 & OoS & Head & Back & Anl & Int.S. & Out & NM\\
OoS & 67.5 & 0.0 & 0.4 & 0.0 & 0.0 & 0.0 & 4.2\\
Head & 0.0 & 0.6 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0\\
Back & 0.1 & 0.0 & 4.4 & 5.

## Agreement Stats

In [9]:
for dataset in sums_dict:
    print(dataset)
    agreements = []
    for t in TYPES:
        denominator = sum(v for k, v
                          in sums_dict[dataset].items()
                          if t in k)
        agreement = (-1 if not denominator else
                     (100 * sums_dict[dataset][f'{t}-{t}'] /
                      denominator
                     )
                    )
        agreements.append(agreement)
    print(" & ".join("%.1f" % a for a in agreements), r'\\')

Canada-EN-1
97.2 & 68.2 & 83.3 & 92.2 & 44.0 & 79.9 & 43.4 \\
Czech_Republic-CZ-1
80.3 & 54.6 & 92.6 & 94.5 & 0.0 & 46.9 & 10.0 \\
France-FR-1
93.5 & 92.5 & 43.0 & 72.2 & -1.0 & 99.1 & 1.0 \\
United_States-EN-1
90.8 & 71.0 & 78.4 & 93.7 & 74.2 & 91.1 & 18.4 \\


In [16]:
dataset = sums_df.sum().to_dict()
agreements = []
for t in TYPES:
    denominator = sum(v for k, v
                      in dataset.items()
                      if t in k)
    agreement = (-1 if not denominator else
                     (100 * dataset[f'{t}-{t}'] /
                      denominator
                 )
                )
    agreements.append(agreement)
print(" & ".join("%.1f" % a for a in agreements), r'\\')

91.8 & 70.4 & 82.1 & 92.6 & 72.1 & 77.3 & 3.8 \\
