# Annotation Statistics

## Step 1: Load Libraries

In [1]:
# Install all required packages
!pip install pandas krippendorff statsmodels scikit-learn seaborn matplotlib numpy


Collecting pandas
  Using cached pandas-2.3.0-cp313-cp313-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting krippendorff
  Using cached krippendorff-0.8.1-py3-none-any.whl.metadata (3.0 kB)
Collecting statsmodels
  Using cached statsmodels-0.14.4-cp313-cp313-macosx_11_0_arm64.whl.metadata (9.2 kB)
Collecting scikit-learn
  Using cached scikit_learn-1.7.0-cp313-cp313-macosx_12_0_arm64.whl.metadata (31 kB)
Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Collecting matplotlib
  Using cached matplotlib-3.10.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting numpy
  Using cached numpy-2.3.1-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting scipy!=1.9.2,>=1.8 (from statsmodels)
  Using cached scipy-1.16.0-cp313-cp313-macosx_

In [2]:
import pandas as pd
import krippendorff
from statsmodels.stats.inter_rater import fleiss_kappa
from sklearn.metrics import cohen_kappa_score
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from statsmodels.stats.inter_rater import aggregate_raters, fleiss_kappa


## Step 2: Read in Data

In [3]:
df = pd.read_csv("./gold_dataset.csv")
df

Unnamed: 0,HITId,HITTypeId,Title,Description,Keywords,Reward,CreationTime,MaxAssignments,RequesterAnnotation,AssignmentDurationInSeconds,...,LifetimeApprovalRate,Last30DaysApprovalRate,Last7DaysApprovalRate,Input.text-eng,Input.label,Input.parameter,Input.parameter_num_labels,Answer.manifesto_class.labels,Approve,Reject
0,39N6W9XWS05H65P9I0HAUP0E2RZGYY,3BRS3IGD63EV308CIXT46SFWM41TR3,[ACSS] Text labeling Manifesto,Identify the most relevant labels to describe ...,"labeling, text, manifesto",$1.00,Mon Apr 14 05:38:01 PDT 2025,3,BatchId:414859;OriginalHitTemplateId:921587261;,3600,...,100% (152/152),100% (46/46),100% (46/46),"Madam President, Mr Posselt, I do not really t...",0,Par303_Governmental and Administrative Efficiency,2,[],,
1,39N6W9XWS05H65P9I0HAUP0E2RZGYY,3BRS3IGD63EV308CIXT46SFWM41TR3,[ACSS] Text labeling Manifesto,Identify the most relevant labels to describe ...,"labeling, text, manifesto",$1.00,Mon Apr 14 05:38:01 PDT 2025,3,BatchId:414859;OriginalHitTemplateId:921587261;,3600,...,100% (238/239),100% (19/19),100% (19/19),"Madam President, Mr Posselt, I do not really t...",0,Par303_Governmental and Administrative Efficiency,2,[],,
2,39N6W9XWS05H65P9I0HAUP0E2RZGYY,3BRS3IGD63EV308CIXT46SFWM41TR3,[ACSS] Text labeling Manifesto,Identify the most relevant labels to describe ...,"labeling, text, manifesto",$1.00,Mon Apr 14 05:38:01 PDT 2025,3,BatchId:414859;OriginalHitTemplateId:921587261;,3600,...,100% (287/287),100% (75/75),100% (19/19),"Madam President, Mr Posselt, I do not really t...",0,Par303_Governmental and Administrative Efficiency,2,[],,
3,3P7RGTLO71VLC9NPSLO9PVA06HDAKL,3BRS3IGD63EV308CIXT46SFWM41TR3,[ACSS] Text labeling Manifesto,Identify the most relevant labels to describe ...,"labeling, text, manifesto",$1.00,Mon Apr 14 05:38:01 PDT 2025,3,BatchId:414859;OriginalHitTemplateId:921587261;,3600,...,100% (152/152),100% (46/46),100% (46/46),"Mr President, ladies and gentlemen, first of a...",1,Par405_Corporatism,2,"[""Keynesian Demand Management"",""Market regulat...",,
4,3P7RGTLO71VLC9NPSLO9PVA06HDAKL,3BRS3IGD63EV308CIXT46SFWM41TR3,[ACSS] Text labeling Manifesto,Identify the most relevant labels to describe ...,"labeling, text, manifesto",$1.00,Mon Apr 14 05:38:01 PDT 2025,3,BatchId:414859;OriginalHitTemplateId:921587261;,3600,...,100% (287/287),100% (75/75),100% (19/19),"Mr President, ladies and gentlemen, first of a...",1,Par405_Corporatism,2,"[""Government and administrative efficiency"",""M...",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
655,3UXQ63NLBX41FPAZJZ06Z67LDCRLB8,3BRS3IGD63EV308CIXT46SFWM41TR3,[ACSS] Text labeling Manifesto,Identify the most relevant labels to describe ...,"labeling, text, manifesto",$1.00,Mon May 19 05:14:58 PDT 2025,3,BatchId:415864;OriginalHitTemplateId:921587261;,3600,...,100% (287/287),100% (75/75),100% (19/19),"Commercial negotiations, such as ACAC, PTCI, A...",2,Par701702_LabourGroups,3,"[""Market regulation"",""Incentives"",""Economic Go...",,
656,3UXQ63NLBX41FPAZJZ06Z67LDCRLB8,3BRS3IGD63EV308CIXT46SFWM41TR3,[ACSS] Text labeling Manifesto,Identify the most relevant labels to describe ...,"labeling, text, manifesto",$1.00,Mon May 19 05:14:58 PDT 2025,3,BatchId:415864;OriginalHitTemplateId:921587261;,3600,...,100% (104/104),100% (34/34),0% (0/0),"Commercial negotiations, such as ACAC, PTCI, A...",2,Par701702_LabourGroups,3,"[""Free Market Economy""]",,
657,36GJS3V79I8RBGUAW0G7K2LQP0EJGL,3BRS3IGD63EV308CIXT46SFWM41TR3,[ACSS] Text labeling Manifesto,Identify the most relevant labels to describe ...,"labeling, text, manifesto",$1.00,Mon May 19 05:14:58 PDT 2025,3,BatchId:415864;OriginalHitTemplateId:921587261;,3600,...,100% (287/287),100% (75/75),100% (19/19),"At a time of serious social crisis, with incre...",2,Par401403_FreeMarket-MarketRegulation,3,"[""Economic Growth"",""Labor Groups Positive"",""Ma...",,
658,36GJS3V79I8RBGUAW0G7K2LQP0EJGL,3BRS3IGD63EV308CIXT46SFWM41TR3,[ACSS] Text labeling Manifesto,Identify the most relevant labels to describe ...,"labeling, text, manifesto",$1.00,Mon May 19 05:14:58 PDT 2025,3,BatchId:415864;OriginalHitTemplateId:921587261;,3600,...,100% (83/83),100% (1/1),0% (0/0),"At a time of serious social crisis, with incre...",2,Par401403_FreeMarket-MarketRegulation,3,"[""Economic Goals""]",,


6 Annotators we are comparing

In [4]:
df['WorkerId'].unique()


array(['A335R4YE2E34H6', 'A1X47COW2Y9SEL', 'A1NBQ61Y6KO3O9',
       'APXGSUB250NVH', 'A3GS2NTQ4XU059', 'A1BQ37ZGUM16XI'], dtype=object)

Normalize data: Seperate Columns by WorkerID

In [5]:
print('-------------------')
context_cols = ['AssignmentId', 'Input.parameter']
df_pivoted = df.pivot(index=context_cols, columns='WorkerId', values='Input.label')
print('-------------------')
df_pivoted.columns = [f'Input.label_{worker}' for worker in df_pivoted.columns]
df_pivoted = df_pivoted.reset_index()
print('-------------------')
df_pivoted


-------------------
-------------------
-------------------


Unnamed: 0,AssignmentId,Input.parameter,Input.label_A1BQ37ZGUM16XI,Input.label_A1NBQ61Y6KO3O9,Input.label_A1X47COW2Y9SEL,Input.label_A335R4YE2E34H6,Input.label_A3GS2NTQ4XU059,Input.label_APXGSUB250NVH
0,3018Q3ZVO699LXYE8728FRSTJ2VAR6,Par401403_FreeMarket-MarketRegulation,,1.0,,,,
1,308XBLVES6NE4736EL24A2CZ1KDRBF,Par405_Corporatism,,1.0,,,,
2,30BUDKLTX1EN6NHSR5RR89DOCYUE5F,Par410416_EconomicGrowthAntiGrowth,,1.0,,,,
3,30BXRYBRPSGT3GNED7M6JCNEOBBWHF,Par410416_EconomicGrowthAntiGrowth,,,,,,1.0
4,30H4UDGLTQ1PBLROSIOOJNNEC0KPMZ,Par405_Corporatism,,,1.0,,,
...,...,...,...,...,...,...,...,...
655,3ZPPDN2SLJF67AJ4UV48KPIQYT9E90,Par409_KeynesianDemandManagement,,,1.0,,,
656,3ZQIG0FLQ2ZYLHIYIW3AYDENWQ0WVJ,Par405_Corporatism,,,,0.0,,
657,3ZR9AIQJUZS6JEYY5T92IS56I0J042,Par404_EconomicPlanning,,1.0,,,,
658,3ZSANO2JC3QGI3FZYCO5J28G2KNFS8,Par410416_EconomicGrowthAntiGrowth,,,,,1.0,


In [17]:
# isolate just the ratings to use as input for further analysis
ratings = df_pivoted[
    ['Input.label_A1BQ37ZGUM16XI', 
     'Input.label_A1NBQ61Y6KO3O9', 
     'Input.label_A1X47COW2Y9SEL', 
     'Input.label_A335R4YE2E34H6', 
     'Input.label_A3GS2NTQ4XU059', 
     'Input.label_APXGSUB250NVH']
]



## First Try: Krippendorff’s Alpha 

Handles missing data, works for nominal/ordinal; More flexible than Fleiss; can handle ordinal distance metrics too

In [20]:
print('-------------------')
alpha = krippendorff.alpha(reliability_data=ratings, level_of_measurement='nominal')
print(f"Krippendorff’s Alpha: {alpha:.3f}")

-------------------
Krippendorff’s Alpha: -0.007


## Second Try: Regular Cohen Kappa

In [None]:
# print('-------------------')
# kappa = cohen_kappa_score(annotator1, annotator2)
# print(f"Cohen's Kappa: {kappa:.2f}")