In [2]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score
import numpy as np
from util import read_excelfile, detect_annotation_errors

In [3]:
files = ['data/participant1_task1.xlsx',
         'data/participant2_task1.xlsx',
         'data/participant3_task1.xlsx']

In [4]:
def get_scores_from_df(df, category, ignore_indices=[]):
    res = []
    for i, line in enumerate(df[category].values.tolist()):
        if i in ignore_indices:
            continue
            
        if line in ['Informativeness', 'Non-redundancy', 'Fluency']:
            pass
        elif isinstance(line, str):
            for i in line.replace('，', ',').split(','):
                res.append(int(i))
        else:
            res.append(int(line))
    return res

In [5]:
df_p1 = read_excelfile(files[0])
df_p2 = read_excelfile(files[1])
df_p3 = read_excelfile(files[2])

In [6]:
for df in (df_p1, df_p2, df_p3):
    errors = detect_annotation_errors(df)
    assert all([len(el) == 0 for el in errors.values()]), f'There are annotation errors: {errors}'
print('INFO: there are no annotation errors')

INFO: there are no annotation errors


In [7]:
participant_scores = [get_scores_from_df(v, 'Informativeness') + \
                get_scores_from_df(v, 'Non-redundancy') + \
                  get_scores_from_df(v, 'Fluency') for v in [df_p1, df_p2, df_p3]]

In [8]:
cohen_kappa_scores = [cohen_kappa_score(participant_scores[i], participant_scores[(i+1)%3]) for i in range(3)]
for i in range(3):
    print(f'k_{i+1}{(i+1)%3+1} = {cohen_kappa_scores[i]:.2f}')

k_12 = 0.52
k_23 = 0.42
k_31 = 0.51


In [9]:
print(f'k_mean = {sum(cohen_kappa_scores) / len(cohen_kappa_scores):.2f}')

k_mean = 0.48
