# Krippendorff's Kappa Score

The goal is to calculate the Krippendorff's alpha to make an assessment of the intercoder reliability between our three coders Frenard, Sally and Trixy.

We calculate the alpha score between 2 coder pairs and overall alpha score for each file.

While calculating the score, following considerations were made:
    Only codes 'T','P','O','D','H','U' are considered. All other values are treated as nan
    In case of multiple Codes for a given sentence, only the first value is considered.
    In the second calculation of Krippendorff's alpha, we treat code 'U' as nan
    



In [1]:
import krippendorff
import glob
import numpy as np
import pandas as pd  
import IPython

In [2]:
df_alpha = pd.DataFrame(columns=['File','Frenard_Sally_Alpha', 'Trixy_Sally_Alpha', 'Frenard_Trixy_Alpha','Overall_Alpha'])
df_alpha.info()

<class 'pandas.core.frame.DataFrame'>
Index: 0 entries
Data columns (total 5 columns):
File                   0 non-null object
Frenard_Sally_Alpha    0 non-null object
Trixy_Sally_Alpha      0 non-null object
Frenard_Trixy_Alpha    0 non-null object
Overall_Alpha          0 non-null object
dtypes: object(5)
memory usage: 0.0+ bytes


In [3]:
files = [f for f in glob.glob("MergedFiles/*.csv")]

compr_files = []

for file in files:
    if len(file.split('-'))==1:
        compr_files.append(file)
compr_files

['MergedFiles/ML13298A103.csv',
 'MergedFiles/ML14041A484.csv',
 'MergedFiles/ML12027A131.csv',
 'MergedFiles/ML13182A476.csv',
 'MergedFiles/wat_2000010.csv',
 'MergedFiles/ML071350662.csv',
 'MergedFiles/ML14087A338.csv']

## Functions to Filter Sentence Codes

In [4]:
labels_with_u = {'T':1, 'P':2, 'O':3, 'D': 4, 'H':5, 'U':6}
def key_to_value(key):
    if type(key) == float:
        return np.nan
    key = key.strip()
    key.split(',')
    key = key[0]
    return labels_with_u[key]

labels_without_u = {'T':1, 'P':2, 'O':3, 'D': 4, 'H':5, 'U':np.nan}
def key_to_value_ignoring_u(key):
    if type(key) == float:
        return np.nan
    key = key.strip()
    key.split(',')
    key = key[0]
    return labels_without_u[key]

## Calculating Krippendorff's Alpha for Each File + Overall KA value

In [5]:
def krippendorff_alpha(filter_key,compr_files):
    df_alpha = pd.DataFrame(columns=['File','Frenard_Sally_Alpha', 'Trixy_Sally_Alpha', 'Frenard_Trixy_Alpha','Overall_Alpha','Reliability_Matrix'])
    file_name = []
    frenard_sally_alpha = []
    trixy_sally_alpha = []
    frenard_trixy_alpha = []
    overall_alpha = []
    rel_mat = []
    for file in compr_files:
        df = pd.read_csv(file)
        df['label_Frenard'] = df['label_Frenard'].apply(filter_key)
        df['label_SALLY'] = df['label_SALLY'].apply(filter_key)
        df['label_struck'] = df['label_struck'].apply(filter_key)
        reliability_matrix = np.asarray([df['label_Frenard'],df['label_SALLY'],df['label_struck']])
        rel_mat.append(reliability_matrix)
        file_name.append(df['file'][0])
        frenard_sally_alpha.append(krippendorff.alpha(reliability_matrix[[0,1]]))
        trixy_sally_alpha.append(krippendorff.alpha(reliability_matrix[[1,2]]))
        frenard_trixy_alpha.append(krippendorff.alpha(reliability_matrix[[0,2]]))
        overall_alpha.append(krippendorff.alpha(reliability_matrix))
    df_alpha['File'] = file_name
    df_alpha['Frenard_Sally_Alpha'] = frenard_sally_alpha
    df_alpha['Trixy_Sally_Alpha'] = trixy_sally_alpha
    df_alpha['Frenard_Trixy_Alpha'] = frenard_trixy_alpha
    df_alpha['Overall_Alpha'] = overall_alpha
    df_alpha['Reliability_Matrix'] = rel_mat
    
    #Calculating overall krippendorff's alpha over all the files
    fr = []
    tx = []
    sa = []
    for i in df_alpha['Reliability_Matrix']:
        fr += list(i[0])
        sa += list(i[1])
        tx += list(i[2])
    rm = np.asarray([fr,tx,sa])
    overall_ka = krippendorff.alpha(rm)
    df_alpha = df_alpha.drop(['Reliability_Matrix'],axis=1)
    return df_alpha, overall_ka

## Krippendorff's Alpha with U

In [6]:
df_alpha_with_u, overall_k_a_with_u = krippendorff_alpha(key_to_value,compr_files)
print("Overall Krippendorff's Alpha Value over all files: ", overall_k_a_with_u)
df_alpha_with_u

Overall Krippendorff's Alpha Value over all files:  0.7457093460230212


Unnamed: 0,File,Frenard_Sally_Alpha,Trixy_Sally_Alpha,Frenard_Trixy_Alpha,Overall_Alpha
0,ML13298A103.txt,-0.0028,-0.0028,1.0,0.497581
1,ML14041A484.txt,0.555551,0.513491,0.949251,0.694262
2,ML12027A131.txt,0.67612,0.807085,0.877529,0.788997
3,ML13182A476.txt,0.791676,0.841701,0.947895,0.864379
4,wat_2000010.txt,1.0,1.0,1.0,1.0
5,ML071350662.txt,0.930124,0.92012,0.976858,0.942764
6,ML14087A338.txt,0.51333,0.520178,1.0,0.68641


## Krippendorff's Alpha without U

In [9]:
df_alpha_without_u, overall_k_a_without_u = krippendorff_alpha(key_to_value_ignoring_u,compr_files)
print("Overall Krippendorff's Alpha Value over all files: ", overall_k_a_without_u)
df_alpha_without_u

Overall Krippendorff's Alpha Value over all files:  0.7240027147273962


Unnamed: 0,File,Frenard_Sally_Alpha,Trixy_Sally_Alpha,Frenard_Trixy_Alpha,Overall_Alpha
0,ML13298A103.txt,,,1.0,1.0
1,ML14041A484.txt,0.111009,-0.103736,0.884672,0.611436
2,ML12027A131.txt,0.636364,1.0,0.713528,0.790442
3,ML13182A476.txt,0.683544,0.683544,1.0,0.814516
4,wat_2000010.txt,1.0,1.0,1.0,1.0
5,ML071350662.txt,0.96,1.0,0.961715,0.97493
6,ML14087A338.txt,-0.043515,-0.131687,1.0,0.572983


In [8]:
df_alpha_with_u.to_csv('Results/Intercode_Reliability_Krippendorf_alpha_with_U.csv', header=True)
df_alpha_without_u.to_csv('Results/Intercode_Reliability_Krippendorf_alpha_without_U.csv', header=True)