# Cohen's kappa score

Computes the kappa socre for two sets of annotated data stored in a .txt file. The program assumes that the annotations are categorical.

If the kappa score is greater that 0.8, then it is considered excellent agreement, 0.6 to 0.8 is substantial agreement, and 0.4 to 0.6 iss considered moderte agreement.

In [2]:
from sklearn.metrics import cohen_kappa_score

In [3]:
def read_annotations_from_file(file_path):
    """
    Read annotations from a text file and return as a list of labels.

    :param file_path: The path to the text file containing annotations.
    :return: List of labels.
    """
    with open(file_path, 'r') as file:
        lines = [line.strip() for line in file]

    # Extracting labels from the last space-separated element of each line
    annotations = [line.split()[-1] for line in lines if len(line.split()) > 1]

    return annotations

In [4]:
def compute_kappa_score(annotations1, annotations2):
    """
    Compute Cohen's kappa score for two sets of annotations.

    :param annotations1: List of annotations from the first annotator (list of categorical labels).
    :param annotations2: List of annotations from the second annotator (list of categorical labels).
    :return: Cohen's kappa score.
    """
    # Ensure the two lists have the same length
    assert len(annotations1) == len(annotations2), "Annotations lists must have the same length."

    # Create a set of all unique labels from both annotators
    all_labels = set(annotations1 + annotations2)

    # Convert labels to integer indices
    label_to_index = {label: index for index, label in enumerate(all_labels)}
    annotations1_indices = [label_to_index[label] for label in annotations1]
    annotations2_indices = [label_to_index[label] for label in annotations2]

    return cohen_kappa_score(annotations1_indices, annotations2_indices)

In [5]:
if __name__ == "__main__":
    # Example usage:
    file_path_annotator1 = "/Users/ellyzamaripapas/Code/NER_Project/data/annotated_data/yes-bisaya-1.txt"
    file_path_annotator2 = "/Users/ellyzamaripapas/Code/NER_Project/data/annotated_data/yes-bisaya-2.txt"

    annotations_annotator1 = read_annotations_from_file(file_path_annotator1)
    annotations_annotator2 = read_annotations_from_file(file_path_annotator2)

    kappa_score = compute_kappa_score(annotations_annotator1, annotations_annotator2)
    print(f"Cohen's Kappa Score: {kappa_score}")

Cohen's Kappa Score: 0.899197510752265
