This notebook was used to calculate inter-annotator agreement and confusion matrices for the *Catching Feelings* project. It is based on existing work by (author name redacted for anonymity).

To run this notebook, you need as input a zipped export from Inception. For the *Catching Feelings* project, this file is available on request.

# Load project and packages

Fetched from: https://github.com/catalpa-cl/inceptalytics/blob/main/examples/example.ipynb


In [None]:
!pip install inceptalytics

In [None]:
!pip install urllib3==1.26.15 requests-toolbelt==0.10.1

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [7]:
from inceptalytics import Project
import itertools
import pandas as pd

import seaborn as sn
import matplotlib.pyplot as plt

In [8]:
# note: upload the zip to the correct folder first

f = "add_your_file_path_here"

In [9]:
# load project
project = Project.from_zipped_xmi(f)

# Calculate IAA-scores

! Pairwise Cohen's Kappa which disregards out-of-the-span tokens (unannotated tokens)

**Results**
Average pairwise Kappa for *Catching Feelings*:


*   aspects: 0.8576086057095573
*   evaluations: 0.8822340027865565



In [10]:
annotators = project.annotators

In [None]:
annotators

In [12]:
# to ensure anonymous peer review, usernames for annotators were redacted from the code
# if reusing, please re-insert the usernames where needed or replace with usernames of your own annotators

annotators = ['name_1', "name_2"]

In [13]:
files = project.source_file_names

In [None]:
len(files)

# Counts for categories

In [64]:
# select category dataframe
pos_type = "custom.Span"
cat = "aspect"

feature_path = f'{pos_type}>{cat}'

pos_annos = project.select(
    annotation=feature_path,
    annotators = annotators,
    source_files = files )

In [None]:
# get all annotations per source file + which annotations were made by whom
# to preserve anonymous peer review, the annotator usernames were removed from the subsequent code
# if reusing, please return the names to the code where needed

document_annotator_matrix = pos_annos.document_annotator_matrix
covered_texts = pos_annos.texts
document_annotator_matrix.join(covered_texts).head()

In [66]:
annots_categories = document_annotator_matrix.join(covered_texts).dropna()

In [None]:
admin_annotations_cats = annots_categories['name_1'].tolist()
amanda_hemmons_cats = annots_categories['name_2'].tolist()

In [None]:
annots_categories

In [None]:
# all annotations category
pos_annos.data_frame.annotation.value_counts()

In [None]:
df = pos_annos.data_frame

In [None]:
# annotations for second annotator
df[(df["annotator"] == "name_2")]["annotation"].value_counts()

In [None]:
# annotations for first author (separately)
df[(df["annotator"] == "name_1")]["annotation"].value_counts()

In [None]:
group = df.groupby(["source_file"]).agg(list)

In [None]:
# Files for IAA
filtered_df = group[group['annotator'].apply(lambda x: 'name_1' in x and 'name_2' in x)]

In [None]:
# overview of all IAA texts
filtered_df.index.unique()

In [None]:
# annotation counts for the files annotated by both annotators
filtered_df.explode("annotation")["annotation"].value_counts()

# Counts for sentiments (evaluation annotations)

In [28]:
# select category dataframe
pos_type = "custom.Span"
cat = "evaluation"

feature_path = f'{pos_type}>{cat}'

pos_annos = project.select(
    annotation=feature_path,
    annotators = annotators,
    source_files = files )

In [None]:
#all annotations evaluation
pos_annos.data_frame.annotation.value_counts()

In [None]:
df = pos_annos.data_frame

In [None]:
# annotations for Amanda (separately)
df[(df["annotator"] == "name_2")]["annotation"].value_counts()

In [None]:
# annotations for admin (separately)
df[(df["annotator"] == "name_1")]["annotation"].value_counts()

In [32]:
group = df.groupby(["source_file"]).agg(list)
# Files for IAA
filtered_df = group[group['annotator'].apply(lambda x: 'name_1' in x and 'name_2' in x)]

In [None]:
# all unique sentences annotated by both annotators
len(set(filtered_df.explode("sentence")["sentence"]))

In [None]:
# annotation counts for the files annotated by both Amanda and admin (for evaluation)
filtered_df.explode("annotation")["annotation"].value_counts()

# Kappa Scores

In [35]:
pos_type = "custom.Span"

In [None]:
cats = ["aspect", "evaluation"]

for cat in cats:
  feature_path = f'{pos_type}>{cat}'

  pos_annos = project.select(
      annotation=feature_path,
      annotators = annotators,
      source_files = files )

  IAA_df = pd.DataFrame(pos_annos.iaa_pairwise(measure='kappa'))

  avg = IAA_df.loc[:, 'kappa'].mean()


  print(IAA_df)
  print(f"Averaged pairwise Kappa score {cat} : {avg}")

In [None]:
# calculate IAA for sentence-level sentiment labels
#pos_type= "webanno.custom.Link"
feature = "aspect"


feature_path = f'{pos_type}>{feature}'

pos_annos = project.select(
    annotation=feature_path,
    annotators = annotators,
    source_files = files )

IAA_df = pd.DataFrame(pos_annos.iaa_pairwise(measure='kappa'))

avg = IAA_df.loc[:, 'kappa'].mean()


print(IAA_df)
print(f"Averaged pairwise Kappa score {cat} : {avg}")

In [None]:
#aantal annotaties per label for IAA FILES
for cat in cats:
  feature_path = f'{pos_type}>{cat}'

  pos_annos = project.select(
      annotation=feature_path,
      annotators = annotators,
      source_files = files )

  count = pos_annos.count(grouped_by='annotation')

  print(cat)
  print(count)
  print("\n")

In [None]:
len(project.source_file_names)

In [None]:
# number of annotations per label for ALL FILES
for cat in cats:
  feature_path = f'{pos_type}>{cat}'

  pos_annos = project.select(
      annotation=feature_path,
      annotators = annotators,
      source_files = project.source_file_names )

  count = pos_annos.count(grouped_by='annotation')

  print(cat)
  print(count)
  print("\n")

# Plot confusion matrices for IAA tests

In [41]:
def plot_annots_confusion(feature= "aspect", pos_type = pos_type, to_drop = []):
  feature_path = f'{pos_type}>{feature}'
  pos_annos = project.select(annotation=feature_path) #project = defined outside

  cm = (pos_annos.confusion_matrices(aggregate="total")) #make df confusion matrix

  try:
    res = pd.DataFrame(cm).drop(columns=to_drop, index=to_drop)
  except:
    res = pd.DataFrame(cm)


  df_cm = pd.DataFrame(res, index = res.keys(),
                  columns = res.keys())

  plt.figure(figsize = (10,7))
  return sn.heatmap(df_cm, annot=True, fmt= "d")

In [None]:
plot_annots_confusion(feature= "aspect", to_drop = ["event"])

# note: two aspects were tagged as 'event' rather than 'event and storyworld' by accident
# to analyze the data, we drop these two

In [None]:
plot_annots_confusion(feature= "evaluation", to_drop = [])

# Playground

In [None]:
#get all annotations in a simple dataframe (......... FML)
annos = pos_annos.data_frame

annos

In [None]:
annos.annotation.value_counts()

In [None]:
feature_path = f'{pos_type}>{"evaluation"}'
pos_annos = project.select(annotation=feature_path) #project = defined outside
annos_sents = pos_annos.data_frame
annos_sents[annos_sents["annotation"] != "None"]

In [47]:
annos_sents = annos_sents[annos_sents["annotation"] != "None"]

In [49]:
# you may want to save your annotations as a CSV
annos_sents.to_csv("annotations.csv")

In [None]:
annos_ents

In [52]:
subsets_lists = [list(sub) for sub in subsets]

In [56]:
annots = document_annotator_matrix.join(covered_texts).dropna()

In [58]:
admin_annotations = annots['name_1'].tolist()
amanda_hemmons_annotations = annots['name_2'].tolist()

In [62]:
annots_cats = document_annotator_matrix.join(covered_texts).dropna()

In [None]:
annots_cats