# Example Annotator Agreements and Disagreements

Extract all annotators' labels on the three descriptions chosen for the participatory data and model evaluation workshop with the Heritage Collections team (held April 2023) for visualization:
* **Example 1:** "Broadcast of the service comemorating the centenary of the death of Henry Duncan, at which John Baillie was preacher.  He gave a biographical talk on Duncan's life and work." (description_id: 376)
* **Example 2:** "Mixture of press cuttings covering many subjects including articles on Deitrich Bonhoeffer and Reinhold Niebuhr, Second World War, housekeeping tips and matters of general interest to Florence Jewel Baillie." (description_id: 444)
* **Example 3:** "Correspondence and related items, relating to the attendence of John Baillie and his wife at the coronation of Elizabeth II." (description_id: 407)

In [1]:
import pandas as pd

In [11]:
df_ann = pd.read_csv("../data/aggregated_with_annotator_eadid_note_cols.csv", index_col=0)
print(df_ann.shape)
df_ann.head()

(76543, 11)


Unnamed: 0,annotator,file,entity,offsets,text,id,label,category,note,eadid,associated_genders
39447,2,AA5_00100.ann,T0,"(789, 791)",He,0,Gendered-Pronoun,Linguistic,,AA5,Masculine
39448,2,AA5_00100.ann,T1,"(871, 873)",he,1,Gendered-Pronoun,Linguistic,,AA5,Masculine
39449,2,AA5_00100.ann,T2,"(913, 916)",his,2,Gendered-Pronoun,Linguistic,,AA5,Masculine
39450,2,AA5_00100.ann,T3,"(928, 930)",he,3,Gendered-Pronoun,Linguistic,,AA5,Masculine
39451,2,AA5_00100.ann,T4,"(1217, 1219)",he,4,Gendered-Pronoun,Linguistic,,AA5,Masculine


In [13]:
# Split the offsets into two columns
df_ann[["ann_start_offset","ann_end_offset"]] = df_ann["offsets"].str.split(", ", n=1, expand=True)

# Turn the offsets into integers
ann_start_offsets = list(df_ann.ann_start_offset)
ann_end_offsets = list(df_ann.ann_end_offset)
new_ann_start_offsets = [int(offset[1:]) for offset in ann_start_offsets]
new_ann_end_offsets = [int(offset[:-1]) for offset in ann_end_offsets]
df_ann = df_ann.drop(columns=["ann_start_offset", "ann_end_offset"])
df_ann.insert(3, "ann_start_offset", new_ann_start_offsets)
df_ann.insert(4, "ann_end_offset", new_ann_end_offsets)

df_ann.head()

Unnamed: 0,annotator,file,entity,ann_start_offset,ann_end_offset,offsets,text,id,label,category,note,eadid,associated_genders
39447,2,AA5_00100.ann,T0,789,791,"(789, 791)",He,0,Gendered-Pronoun,Linguistic,,AA5,Masculine
39448,2,AA5_00100.ann,T1,871,873,"(871, 873)",he,1,Gendered-Pronoun,Linguistic,,AA5,Masculine
39449,2,AA5_00100.ann,T2,913,916,"(913, 916)",his,2,Gendered-Pronoun,Linguistic,,AA5,Masculine
39450,2,AA5_00100.ann,T3,928,930,"(928, 930)",he,3,Gendered-Pronoun,Linguistic,,AA5,Masculine
39451,2,AA5_00100.ann,T4,1217,1219,"(1217, 1219)",he,4,Gendered-Pronoun,Linguistic,,AA5,Masculine


In [14]:
df_desc = pd.read_csv("../data/description_sentences.csv", index_col=0) # df_desc = pd.read_csv("../data/annot_descs.csv", index_col=0)
print(df_desc.shape)
df_desc.head()

(42030, 6)


Unnamed: 0,description_id,file,desc_start_offset,desc_end_offset,field,sentences
0,0,AA5_00100.txt,0,16,Identifier,Identifier: AA5
1,1,AA5_00100.txt,17,76,Title,Title:\nPapers of The Very Rev Prof James Whyt...
2,2,AA5_00100.txt,77,633,Scope and Contents,"Scope and Contents:\nSermons and addresses, 19..."
3,3,AA5_00100.txt,634,1725,Biographical / Historical,Biographical / Historical:\nProfessor James Ai...
3,3,AA5_00100.txt,634,1725,Biographical / Historical,He was educated at Daniel Stewart's College an...


## Example 1

In [12]:
ex1_sent1 = "Broadcast of the service comemorating the centenary of the death of Henry Duncan, at which John Baillie was preacher."
ex1_sent2 = "He gave a biographical talk on Duncan's life and work."

In [15]:
df_desc.loc[df_desc.sentences == ex1_sent2]

Unnamed: 0,description_id,file,desc_start_offset,desc_end_offset,field,sentences
376,376,BAI_01100.txt,2166,2359,Scope and Contents,He gave a biographical talk on Duncan's life a...


In [40]:
bai_01100 = df_ann.loc[df_ann.file == "BAI_01100.ann"]
df_ex1 = bai_01100.loc[bai_01100.ann_start_offset >= 2166]
df_ex1 = df_ex1.loc[df_ex1.ann_end_offset <= 2359]
df_ex1.sort_values(by="ann_start_offset")

Unnamed: 0,annotator,file,entity,ann_start_offset,ann_end_offset,offsets,text,id,label,category,note,eadid,associated_genders
39603,2,BAI_01100.ann,T13,2254,2266,"(2254, 2266)",Henry Duncan,158,Unknown,Person-Name,,BAI,Masculine
31781,1,BAI_01100.ann,T13,2254,2266,"(2254, 2266)",Henry Duncan,8920,Unknown,Person-Name,,BAI,Masculine
13677,0,BAI_01100.ann,T29,2254,2266,"(2254, 2266)",Henry Duncan,18391,Unknown,Person-Name,,BAI,Masculine
97,1,BAI_01100.ann,T27,2277,2289,"(2277, 2289)",John Baillie,173,Masculine,Person-Name,,BAI,Unclear
153,2,BAI_01100.ann,T27,2277,2289,"(2277, 2289)",John Baillie,173,Masculine,Person-Name,,BAI,Unclear
31793,1,BAI_01100.ann,T25,2277,2289,"(2277, 2289)",John Baillie,8932,Unknown,Person-Name,,BAI,Unclear
13678,0,BAI_01100.ann,T30,2277,2289,"(2277, 2289)",John Baillie,18392,Unknown,Person-Name,,BAI,Unclear
64708,4,BAI_01100.ann,T6,2294,2302,"(2294, 2302)",preacher,3787,Occupation,Contextual,,BAI,Multiple
60547,0,BAI_01100.ann,T16,2294,2302,"(2294, 2302)",preacher,18377,Occupation,Contextual,,BAI,Multiple
39592,2,BAI_01100.ann,T1,2304,2306,"(2304, 2306)",He,147,Gendered-Pronoun,Linguistic,,BAI,Masculine


## Example 2

In [23]:
ex2_1 = "Mixture of press cuttings covering many subjects including articles on Dietrich Bonhoeffer"
ex2_2 = "housekeeping tips and matters of general interest to Florence Jewel Baillie."

In [28]:
sentences = list(df_desc.sentences)
for s in sentences:
    if ex2_2 in s:
        print(s)

Scope and Contents:
Mixture of press cuttings covering many subjects including articles on Deitrich Bonhoeffer and Reinhold Niebuhr, Second World War, housekeeping tips and matters of general interest to Florence Jewel Baillie.


In [31]:
df_desc.loc[df_desc.sentences == """Scope and Contents:
Mixture of press cuttings covering many subjects including articles on Deitrich Bonhoeffer and Reinhold Niebuhr, Second World War, housekeeping tips and matters of general interest to Florence Jewel Baillie."""]

Unnamed: 0,description_id,file,desc_start_offset,desc_end_offset,field,sentences
444,444,BAI_01300.txt,4937,5165,Scope and Contents,Scope and Contents:\nMixture of press cuttings...


In [41]:
bai_01300 = df_ann.loc[df_ann.file == "BAI_01300.ann"]
df_ex2 = bai_01300.loc[bai_01300.ann_start_offset >= 4937]
df_ex2 = df_ex2.loc[df_ex2.ann_end_offset <= 5165]
df_ex2.sort_values(by="ann_start_offset")

Unnamed: 0,annotator,file,entity,ann_start_offset,ann_end_offset,offsets,text,id,label,category,note,eadid,associated_genders
39731,2,BAI_01300.ann,T21,5028,5047,"(5028, 5047)",Deitrich Bonhoeffer,290,Unknown,Person-Name,,BAI,Unclear
37280,1,BAI_01300.ann,T69,5028,5047,"(5028, 5047)",Deitrich Bonhoeffer,14469,Unknown,Person-Name,,BAI,Unclear
20728,0,BAI_01300.ann,T21,5028,5047,"(5028, 5047)",Deitrich Bonhoeffer,28616,Unknown,Person-Name,,BAI,Unclear
39760,2,BAI_01300.ann,T54,5052,5068,"(5052, 5068)",Reinhold Niebuhr,325,Unknown,Person-Name,,BAI,Unclear
37281,1,BAI_01300.ann,T70,5052,5068,"(5052, 5068)",Reinhold Niebuhr,14470,Unknown,Person-Name,,BAI,Unclear
20761,0,BAI_01300.ann,T63,5052,5068,"(5052, 5068)",Reinhold Niebuhr,28657,Unknown,Person-Name,,BAI,Unclear
74460,3,BAI_01300.ann,T17,5088,5163,"(5088, 5163)",housekeeping tips and matters of general inter...,8330,Stereotype,Contextual,"women being associated only with personal, fam...",BAI,Feminine
71943,0,BAI_01300.ann,T22,5088,5163,"(5088, 5163)",housekeeping tips and matters of general inter...,28658,Stereotype,Contextual,Woman associated with house\n,BAI,Feminine
39761,2,BAI_01300.ann,T55,5141,5163,"(5141, 5163)",Florence Jewel Baillie,326,Unknown,Person-Name,,BAI,Unclear
37266,1,BAI_01300.ann,T55,5141,5163,"(5141, 5163)",Florence Jewel Baillie,14455,Feminine,Person-Name,,BAI,Unclear


In [53]:
list(df_ex2.loc[df_ex2.id == 8330].text)[0]

'housekeeping tips and matters of general interest to Florence Jewel Baillie'

## Example 3

In [34]:
ex3 = "Correspondence and related items, relating to the attendence of John Baillie and his wife at the coronation of Elizabeth II."

In [36]:
df_desc.loc[df_desc.sentences == "Scope and Contents:\n"+ex3]

Unnamed: 0,description_id,file,desc_start_offset,desc_end_offset,field,sentences
407,407,BAI_01200.txt,4339,4484,Scope and Contents,Scope and Contents:\nCorrespondence and relate...


In [42]:
bai_01200 = df_ann.loc[df_ann.file == "BAI_01200.ann"]
df_ex3 = bai_01200.loc[bai_01200.ann_start_offset >= 4339]
df_ex3 = df_ex3.loc[df_ex3.ann_end_offset <= 4484]
df_ex3.sort_values(by="ann_start_offset")

Unnamed: 0,annotator,file,entity,ann_start_offset,ann_end_offset,offsets,text,id,label,category,note,eadid,associated_genders
69333,0,BAI_01200.ann,T45,4393,4482,"(4393, 4482)",relating to the attendence of John Baillie and...,21536,Omission,Contextual,"Man named, woman unnamed and referenced only i...",BAI,Multiple
39699,2,BAI_01200.ann,T78,4423,4435,"(4423, 4435)",John Baillie,258,Masculine,Person-Name,,BAI,Unclear
74194,3,BAI_01200.ann,T26,4423,4448,"(4423, 4448)",John Baillie and his wife,6237,Stereotype,Contextual,woman described in relation to man\n,BAI,Multiple
33683,1,BAI_01200.ann,T76,4423,4435,"(4423, 4435)",John Baillie,10838,Masculine,Person-Name,,BAI,Unclear
15958,0,BAI_01200.ann,T83,4423,4435,"(4423, 4435)",John Baillie,21579,Masculine,Person-Name,,BAI,Unclear
39640,2,BAI_01200.ann,T15,4440,4443,"(4440, 4443)",his,196,Gendered-Pronoun,Linguistic,,BAI,Masculine
33624,1,BAI_01200.ann,T15,4440,4443,"(4440, 4443)",his,10777,Gendered-Pronoun,Linguistic,,BAI,Masculine
15901,0,BAI_01200.ann,T15,4440,4443,"(4440, 4443)",his,21513,Gendered-Pronoun,Linguistic,,BAI,Masculine
39700,2,BAI_01200.ann,T79,4444,4448,"(4444, 4448)",wife,259,Gendered-Role,Linguistic,,BAI,Feminine
33643,1,BAI_01200.ann,T35,4444,4448,"(4444, 4448)",wife,10797,Gendered-Role,Linguistic,,BAI,Feminine


In [54]:
list(df_ex3.loc[df_ex3.id == 21536].text)[0]

'relating to the attendence of John Baillie and his wife at the coronation of Elizabeth II'