# IAA (Krippendorff alpha-u) results

Runs unitizing Krippendorff alpha (alpha-u) with `include_background=True` for the JSON files in `annotations/final_annotations_iaa_set`.

In [1]:
from pathlib import Path
import sys


def find_repo_root(start: Path) -> Path:
    for candidate in [start] + list(start.parents):
        if (candidate / "annotations").is_dir() and (candidate / "iaa_scores").is_dir():
            return candidate
    raise RuntimeError(f"Could not find repo root starting from {start}")


repo_root = find_repo_root(Path.cwd().resolve())
if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

input_dir = repo_root / "annotations" / "final_annotations_iaa_set"
input_dir

PosixPath('/home/lbrenap/Documents/projects/detecting_arguments/annotations/final_annotations_iaa_set')

In [2]:
from iaa_scores.corpus import build_corpus


corpus = build_corpus(input_dir, min_annotators=2)
print(f"Found {len(corpus.files)} files; included {len(corpus.doc_spans)} documents")
print("Categories:", corpus.categories)

Found 20 files; included 10 documents
Categories: ['Analysis', 'Background Facts', 'Conclusion', 'Procedural History', 'Rule']


In [3]:
from iaa_scores.alpha import compute_alpha_u_overall, compute_alpha_u_per_doc


per_doc_alpha_u = {
    ref_id: compute_alpha_u_per_doc(
        corpus.doc_offsets[ref_id],
        corpus.categories,
        continuum_len=corpus.doc_lengths.get(ref_id),
        include_background=True,
    )
    for ref_id in sorted(corpus.doc_offsets)
}

overall_alpha_u = compute_alpha_u_overall(
    corpus.doc_offsets,
    corpus.categories,
    include_background=True,
    doc_lengths=corpus.doc_lengths,
)

In [4]:
print("Per-document alpha_u by category:")
for ref_id in sorted(per_doc_alpha_u):
    annotators = len(corpus.doc_spans.get(ref_id, {}))
    print(f"\n-- ref_id: {ref_id} (annotators: {annotators}) --")
    for cat in corpus.categories:
        print(f"  {cat}: {per_doc_alpha_u[ref_id].get(cat)}")

print("\nOverall alpha_u by category:")
for cat in corpus.categories:
    print(f"  {cat}: {overall_alpha_u.get(cat)}")

Per-document alpha_u by category:

-- ref_id: 4 (annotators: 2) --
  Analysis: 0.5037963454127594
  Background Facts: 0.9989852189593874
  Conclusion: 1.0
  Procedural History: -0.010531914893616934
  Rule: 0.6841713123325418

-- ref_id: 5 (annotators: 2) --
  Analysis: 0.6167070001572549
  Background Facts: 0.6749841145713855
  Conclusion: 1.0
  Procedural History: -0.05287123106686176
  Rule: 0.5711367034791868

-- ref_id: 6 (annotators: 2) --
  Analysis: 0.17282843731593722
  Background Facts: 0.972803590793567
  Conclusion: 1.0
  Procedural History: 0.667112925241009
  Rule: 0.4496890659346584

-- ref_id: 7 (annotators: 2) --
  Analysis: 0.6615328958404703
  Background Facts: 0.4907807592107821
  Conclusion: 1.0
  Procedural History: 0.21959484244080307
  Rule: 0.9122341302397033

-- ref_id: 8 (annotators: 2) --
  Analysis: 0.7751092886518551
  Background Facts: 0.9397916495669545
  Conclusion: -0.01615932739035797
  Procedural History: 0.8334901708804204
  Rule: 0.714047517037129


In [5]:
from iaa_scores.f1_scores import compute_f1_for_document, micro_average_f1


metric = "yujianbo"
min_sim = 0.1

per_doc_f1 = {
    ref_id: compute_f1_for_document(
        corpus.doc_spans[ref_id],
        corpus.categories,
        metric=metric,
        min_sim=min_sim,
    )
    for ref_id in sorted(corpus.doc_spans)
}
pair_counts = {
    ref_id: (len(corpus.doc_spans[ref_id]) * (len(corpus.doc_spans[ref_id]) - 1)) // 2
    for ref_id in corpus.doc_spans
}
overall_f1 = micro_average_f1(per_doc_f1, pair_counts, corpus.categories)

print("Per-document F1 by category:")
for ref_id in sorted(per_doc_f1):
    annotators = len(corpus.doc_spans.get(ref_id, {}))
    print(f"\n-- ref_id: {ref_id} (annotators: {annotators}) --")
    for cat in corpus.categories:
        print(f"  {cat}: {per_doc_f1[ref_id].get(cat)}")

print("\nOverall F1 by category (micro average over annotator pairs):")
for cat in corpus.categories:
    print(f"  {cat}: {overall_f1.get(cat)}")


Annotator pairs: 100%|██████████| 1/1 [00:15<00:00, 15.98s/it]
Annotator pairs: 100%|██████████| 1/1 [00:29<00:00, 29.44s/it]
Annotator pairs: 100%|██████████| 1/1 [00:40<00:00, 40.35s/it]
Annotator pairs: 100%|██████████| 1/1 [00:21<00:00, 21.23s/it]
Annotator pairs: 100%|██████████| 1/1 [00:56<00:00, 56.66s/it]
Annotator pairs: 100%|██████████| 1/1 [00:31<00:00, 31.44s/it]
Annotator pairs: 100%|██████████| 1/1 [00:05<00:00,  5.29s/it]
Annotator pairs: 100%|██████████| 1/1 [00:17<00:00, 17.23s/it]
Annotator pairs: 100%|██████████| 1/1 [00:25<00:00, 25.50s/it]
Annotator pairs: 100%|██████████| 1/1 [01:48<00:00, 108.02s/it]

Per-document F1 by category:

-- ref_id: 4 (annotators: 2) --
  Analysis: 0.14298645843177238
  Background Facts: 0.525328330206379
  Conclusion: 1.0
  Procedural History: 0.0
  Rule: 0.491186856883474

-- ref_id: 5 (annotators: 2) --
  Analysis: 0.6482355277885301
  Background Facts: 0.5964262717321314
  Conclusion: 1.0
  Procedural History: 0.3126463700234192
  Rule: 0.40091171301613593

-- ref_id: 6 (annotators: 2) --
  Analysis: 0.27023778170637847
  Background Facts: 0.9652669777086573
  Conclusion: 1.0
  Procedural History: 0.5258547008547008
  Rule: 0.4682695253810574

-- ref_id: 7 (annotators: 2) --
  Analysis: 0.4070731707317073
  Background Facts: 0.5228494623655914
  Conclusion: 1.0
  Procedural History: 0.09749144811858607
  Rule: 0.8

-- ref_id: 8 (annotators: 2) --
  Analysis: 0.31247155536140286
  Background Facts: 0.38763433224447286
  Conclusion: 0.4441489361702128
  Procedural History: 0.7883720930232558
  Rule: 0.3406286521324152

-- ref_id: 9 (annotators: 2) --
  An


