In [1]:
import pandas as pd
from itertools import combinations
from nltk.metrics.agreement import AnnotationTask

In [10]:
def alpha_df(df, good_only=False):
    data = []
    questions = range(1, 128)
    if good_only: questions = list(set(questions)-set({i for i in questions if len(answers[i].dropna().unique()) > 3}))
    for annotator in df.index:
        for question in questions:
            cell = df[question][annotator]
            if cell is not None : data.append((annotator, question, cell))
    try: return AnnotationTask(data=data).alpha()
    except ZeroDivisionError: return 1

In [3]:
def get_trust_coefs(df, good_only=False):
    alphas = dict()
    for i in range(2, len(df.index)+1):
        for combination in combinations(df.index, i):
            alphas[combination] = alpha_df(df.loc[list(combination)], good_only)
    coefs = dict()
    coef = 1
    sorted_keys = [key for key, val in sorted(alphas.items(), key = lambda ele: ele[1])]
    for i, key in enumerate(sorted_keys):
        if alphas[key] > alphas[sorted_keys[i-1]] : coef += 1
        for annotator in key:
            try: coefs[annotator] += coef
            except KeyError: coefs[annotator] = coef
    m = max([coef for coef in coefs.values()])
    return {annotator : c/(coef+m) for annotator, c in coefs.items()}

In [19]:
def ident_bad_annot(coefs, good_only=False):
    for annotator, coef in coefs.items():
        if coef <= 0.5: yield annotator

In [5]:
answers = pd.read_csv("answers.csv", names = ["date_time", "email", "ling", "coref"]+list(range(1, 131))+["comments"])
answers.drop(0, axis=0, inplace=True)
answers.drop("date_time", axis=1, inplace=True)
answers = answers.groupby("email").last()

In [6]:
smaller = {"2" : "1", "3" : "2", "4" : "2", "5" : "3", "6" : "3", "7" : "4"}
small = answers.applymap(lambda x : smaller[x] if x in smaller.keys() else x)

In [7]:
print(f"Alpha de Krippendorff sur toutes les exemples avec une échelle de 0 à 7 : {alpha_df(answers)}\n\
Alpha de Krippendorff sur toutes les exemples avec une échelle réduite de 0 à 4 : {alpha_df(small)}\n\
Alpha de Krippendorff sur des 'bons' exemples seulement avec une échelle de 0 à 7 : {alpha_df(answers, good_only=True)}\n\
Alpha de Krippendorff sur des 'bons' exemples seulement avec une échelle réduite de 0 à 4 : {alpha_df(small, good_only=True)}\n")

Alpha de Krippendorff sur toutes les exemples avec une échelle de 0 à 7 : 0.11707330361702328
Alpha de Krippendorff sur toutes les exemples avec une échelle réduite de 0 à 4 : 0.18881285344193743
Alpha de Krippendorff sur des 'bons' exemples seulement avec une échelle de 0 à 7 : 0.1967592592592593
Alpha de Krippendorff sur des 'bons' exemples seulement avec une échelle réduite de 0 à 4 : 0.2990447495725904



In [20]:
trust_coefs = [get_trust_coefs(answers), get_trust_coefs(small), get_trust_coefs(answers, good_only=True), get_trust_coefs(small, good_only=True)]
for coef in trust_coefs: print(list(ident_bad_annot(coef)))    

[]
[]
[]
[]


In [13]:
annotators_info = answers[["ling", "coref"]]
for i, trust_coef in enumerate(trust_coefs, 1):
    annotators_info.loc[:,"trust_coef"+str(i)] = None
    for annotator in trust_coef:
        annotators_info.loc[annotator,"trust_coef"+str(i)] = trust_coef[annotator]
annotators_info["average"] = annotators_info[["trust_coef1", "trust_coef2", "trust_coef3", "trust_coef4"]].mean(axis=1)
annotators_info.sort_values(by="average", ascending=False)

Unnamed: 0_level_0,ling,coref,trust_coef1,trust_coef2,trust_coef3,trust_coef4,average
email,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
sylvie.billot@univ-orleans.fr,non,oui,0.9968,0.996895,0.957998,0.914596,0.966572
loic.grobol@gmail.com,oui,oui,0.89906,0.85199,0.991583,0.996768,0.93485
ilaine.wang@inalco.fr,oui,non,0.967589,0.973415,0.93707,0.80933,0.921851
ygor.gallina@univ-nantes.fr,non,non,0.870167,0.835732,0.996591,0.95697,0.914865
fanny.krimou@hotmail.fr,oui,non,0.840049,0.795324,0.98011,0.942989,0.889618
emmanuel.schang@univ-orleans.fr,oui,oui,0.839957,0.759997,0.996591,0.95697,0.888379
Frederique.bordignon@enpc.fr,oui,non,0.786076,0.747536,0.937099,0.871181,0.835473
bergler@cse.concordia.ca,non,oui,0.723526,0.692038,0.941452,0.882177,0.809798
mariya,oui,oui,0.877624,0.977614,0.671122,0.632575,0.789734
anais.halftermeyer@univ-orleans.fr,oui,oui,0.632276,0.674202,0.724254,0.733764,0.691124
