In [3]:
import json
from sklearn.metrics import cohen_kappa_score
from statsmodels.stats.inter_rater import fleiss_kappa

annotations = {}


In [None]:
classes_map = {
    "wzmocnienie": 0,
    "osłabienie": 1,
    "odwracanie": 2,
    "Wzmocnienie": 0,
    "Osłabienie": 1,
    "Odwracanie": 2
}

In [2]:
def loadDataset(path: str, annots: dict[str, list[list]] = None):
    with open(path, 'r', encoding='utf-8') as file:
        if annots == None:
            annots = {}
            for line in file:
                annots[line["text"]] = [[[startIndex, endIndex, classes_map[label]] for startIndex, endIndex, label in line["label"]]]
        else:
            for line in file:
                annots[line["text"]].append([[startIndex, endIndex, classes_map[label]] for startIndex, endIndex, label in line["label"]])
    return annots

In [None]:
def separateConflicts(annots: dict[str, list[list]]) -> tuple[list[dict], list[dict]]:
    correct = []
    wrong = []

    for text, labels in annots.items():
        if all(label == labels[0] for label in labels):
            correct.append({"text": text, "label": labels[0]})
        else:
            wrong.append({"text": text, "label": "", "old_labels": str(labels)})
    
    return correct, wrong

In [3]:
def classify_words(text, spans):
    words = text.split()
    word_positions = []
    position = 0

    for word in words:
        start_pos = text.find(word, position)
        end_pos = start_pos + len(word)
        word_positions.append((start_pos, end_pos))
        position = end_pos

    word_classes = [3] * len(words)

    for start, end, label in spans:
        for i, (word_start, word_end) in enumerate(word_positions):
            if word_start >= start and word_end <= end:
                word_classes[i] = label

    return word_classes

In [None]:
def kappaCohenResult(annots: dict[str, list[list]]):
    annotators = [[], [], []]
    for text, annotations in annots.items():
        for annotation, annotator in zip(annotations, annotators):
            annotator.extend(classify_words(text, annotation))
    
    print('Kappa Cohen:')
    print(f'Klaudia - Michał: {cohen_kappa_score(annotators[0], annotators[1])}')
    print(f'Klaudia - Kajtek: {cohen_kappa_score(annotators[0], annotators[2])}')
    print(f'Kajtek - Michał: {cohen_kappa_score(annotators[2], annotators[1])}')


In [None]:
def kappaFleissResult(annots: dict[str, list[int | str]]):
    results = []
    for text, annotations in annots.items():
        words = []
        new_sample = [0, 0, 0, 0]
        for annotation in annotations:
            new_sample[annotation] += 1
        results.append(new_sample)
    print(f'Kappa Fleiss: {fleiss_kappa(results)}')