In [2]:
%pip install -q statsmodels
from statsmodels.stats.inter_rater import fleiss_kappa
import pandas as pd

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.metrics import classification_report

In [None]:
df = pd.read_csv("/home/beatrice/projects/clicit/merged_annotations.csv", delimiter=';')
print(f"Numero totale di item: {len(df)}" )
df = df.dropna()
df.columns = [c.replace('_', '') for c in df.columns]

annotators = ['AnnBea', 'AnnLia', 'AnnMarco']
categories = sorted(df[annotators].stack().unique())

print(f"Totale item con annotazioni: {len(df)}")

def count_categories(row):
    counts = [list(row).count(cat) for cat in categories]
    return counts

category_counts = df[annotators].apply(count_categories, axis=1, result_type='expand')
category_counts.columns = categories

fleiss_matrix = category_counts.to_numpy()
kappa = fleiss_kappa(fleiss_matrix)

print(f"Fleiss K: {kappa:.4f}")

Numero totale di item: 631
Totale item con annotazioni: 627
Fleiss K: 0.6597


In [16]:

df = pd.read_csv("/data/EN/target annotation/merged_annotations.csv")

def majority_vote(row):
    votes = [v for v in [row['AnnBea'], row['AnnLia'], row['AnnMarco']] if isinstance(v, str) and v.strip() != '']
    if not votes:
        return None
    return max(set(votes), key=votes.count)

df['majority_vote'] = df.apply(majority_vote, axis=1)

df = df[df['majority_vote'].notnull() & df['llama classification'].notnull()]

y_true = df['majority_vote']
y_pred = df['llama classification']

report_dict = classification_report(y_true, y_pred, output_dict=True, zero_division=0)

precision_avg = report_dict['weighted avg']['precision']
recall_avg = report_dict['weighted avg']['recall']
f1_avg = report_dict['weighted avg']['f1-score']

print(f"Average Precision: {precision_avg:.3f}")
print(f"Average Recall:    {recall_avg:.3f}")
print(f"Average F1-score:  {f1_avg:.3f}")

Average Precision: 0.718
Average Recall:    0.675
Average F1-score:  0.665


In [14]:
df.head(20)

Unnamed: 0,Target,llama classification,AnnBea,AnnLia,AnnMarco,majority_vote
0,aryan people,Race,Race,Race,Race,Race
1,baptists,Religion,Religion,Religion,Religion,Religion
2,biharis,Disability,Nationality,Nationality,Race,Nationality
3,india,Nationality,Nationality,Nationality,Nationality,Nationality
4,anti-gun activist,Political_Identity,Political_Identity,Political_Identity,Political_Identity,Political_Identity
5,third world people,Class,Class,Class,Nationality,Class
6,ideologues,Political_Identity,Political_Identity,Political_Identity,Political_Identity,Political_Identity
7,foreign,Nationality,Nationality,Nationality,Nationality,Nationality
8,native american,Race,Race,Nationality,Race,Race
9,mother,Gender,NONE,Gender,NONE,NONE


In [18]:
df.to_csv('majority_vote.csv', index=False)