In [1]:
import pandas as pd
import numpy as np
from collections import Counter

import simpledorff

# note: fleiss_kappa extends cohen's kappa to mutiple raters
from statsmodels.stats import inter_rater

In [2]:
panel_bios = pd.read_csv(f'../data/panel_bios_anonymized.tsv',
                         sep = "\t",
                        index_col=0, 
                        low_memory=False)

# separate hand coded bios
coded_df = panel_bios[panel_bios['handcoded']==1].reset_index()

print(f'Total of {len(coded_df)} coded users loaded')

Total of 5064 coded users loaded


In [3]:
gender_codes = ['Coder1_gender', 'Coder2_gender', 
                  'Coder3_gender', 'Coder4_gender']

trans_codes = ['Coder1_trans', 'Coder2_trans', 'Coder3_trans', 'Coder4_trans']

keep = ['index', 'code_gender','code_count_gender', 
        'code_trans', 'code_count_trans', 'n_coders'] + gender_codes + trans_codes

codes = coded_df[keep]

# Intercoder reliability for handcoded data

In [4]:
# Create subsets
all_coded = codes[codes['n_coders']==4]
print(len(all_coded))

200


In [5]:
batchA = codes[(codes['n_coders']==2)&(codes['Coder3_gender'].isna())]
print(len(batchA))

2422


In [6]:
batchB = codes[(codes['n_coders']==2)&(codes['Coder1_gender'].isna())]
print(len(batchB))

2435


In [7]:
# 9 people were only coded by one person
lost = codes[codes['n_coders']==1]
print(len(lost))

9


In [8]:
total_coded = len(all_coded) + len(batchA) + len(batchB)
print(total_coded)

5057


# Krippendorf's Alpha

In [9]:
def calc_kripp(df, cols, topic, group, sure=False):
        
    sub = df[['index'] + cols].copy()

    df = pd.melt(sub, 
                 id_vars='index', value_vars=cols,  
                 var_name='Coder', value_name='Code')
    
    if sure:
        df = df[df['Code']!='Not sure']
    
    krip = simpledorff.calculate_krippendorffs_alpha_for_df(df,experiment_col='index',
                                             annotator_col='Coder',
                                             class_col='Code')
    
    print(f'***** {topic}: {group} ******')
    if sure:
        print('"Not Sure" values dropped')
    print(f'Krippendorff\'s Alpha: {krip:.2}\n')

In [10]:
calc_kripp(codes, gender_codes, 'Gender', 'ALL')
calc_kripp(all_coded, gender_codes, 'Gender', 'All Coded')
calc_kripp(batchA, gender_codes[:2], 'Gender', 'BatchA')
calc_kripp(batchB, gender_codes[2:], 'Gender', 'BatchB')

***** Gender: ALL ******
Krippendorff's Alpha: 0.86

***** Gender: All Coded ******
Krippendorff's Alpha: 0.87

***** Gender: BatchA ******
Krippendorff's Alpha: 0.86

***** Gender: BatchB ******
Krippendorff's Alpha: 0.85



In [11]:
calc_kripp(codes, gender_codes, 'Gender', 'ALL', sure=True)
calc_kripp(all_coded, gender_codes, 'Gender', 'All Coded', sure=True)
calc_kripp(batchA, gender_codes[:2], 'Gender', 'BatchA', sure=True)
calc_kripp(batchB, gender_codes[2:], 'Gender', 'BatchB', sure=True)

***** Gender: ALL ******
"Not Sure" values dropped
Krippendorff's Alpha: 0.96

***** Gender: All Coded ******
"Not Sure" values dropped
Krippendorff's Alpha: 0.97

***** Gender: BatchA ******
"Not Sure" values dropped
Krippendorff's Alpha: 0.95

***** Gender: BatchB ******
"Not Sure" values dropped
Krippendorff's Alpha: 0.96



In [12]:
calc_kripp(codes, trans_codes, 'Trans', 'ALL')
calc_kripp(all_coded, trans_codes, 'Trans', 'All Coded')
calc_kripp(batchA, trans_codes[:2], 'Trans', 'BatchA')
calc_kripp(batchB, trans_codes[2:], 'Trans', 'BatchB')

***** Trans: ALL ******
Krippendorff's Alpha: 0.25

***** Trans: All Coded ******
Krippendorff's Alpha: 0.28

***** Trans: BatchA ******
Krippendorff's Alpha: 0.29

***** Trans: BatchB ******
Krippendorff's Alpha: 0.19



In [13]:
calc_kripp(codes, trans_codes, 'Trans', 'ALL', sure=True)
calc_kripp(all_coded, trans_codes, 'Trans', 'All Coded', sure=True)
calc_kripp(batchA, trans_codes[:2], 'Trans', 'BatchA', sure=True)
calc_kripp(batchB, trans_codes[2:], 'Trans', 'BatchB', sure=True)

***** Trans: ALL ******
"Not Sure" values dropped
Krippendorff's Alpha: 0.26

***** Trans: All Coded ******
"Not Sure" values dropped
Krippendorff's Alpha: 0.088

***** Trans: BatchA ******
"Not Sure" values dropped
Krippendorff's Alpha: 0.6

***** Trans: BatchB ******
"Not Sure" values dropped
Krippendorff's Alpha: 0.014



# Fleiss' Kappa

### This is a generalization of Cohen's Kappa for multi-annotators
**Can NOT handle missing data**

In [14]:
def calc_fleiss(df, cols, topic, group):
    sub = df[cols].copy()
    
    # fill missing values with not sure
    sub = sub.fillna('Not sure')
    
    data, cats = inter_rater.aggregate_raters(sub) # returns a tuple (data, categories)
    kappa = inter_rater.fleiss_kappa(data, method='fleiss')

    print(f'***** {topic}: {group} ******')
    print(f'Fleiss\' Kappa: {kappa:.2}\n')

In [15]:
calc_fleiss(all_coded, gender_codes, 'Gender', 'All Coded')
calc_fleiss(batchA, gender_codes[:2], 'Gender', 'BatchA')
calc_fleiss(batchB, gender_codes[2:], 'Gender', 'BatchB')

***** Gender: All Coded ******
Fleiss' Kappa: 0.87

***** Gender: BatchA ******
Fleiss' Kappa: 0.86

***** Gender: BatchB ******
Fleiss' Kappa: 0.85



In [16]:
calc_fleiss(all_coded, trans_codes, 'Trans', 'All Coded')
calc_fleiss(batchA, trans_codes[:2], 'Trans', 'BatchA')
calc_fleiss(batchB, trans_codes[2:], 'Trans', 'BatchB')

***** Trans: All Coded ******
Fleiss' Kappa: 0.28

***** Trans: BatchA ******
Fleiss' Kappa: 0.29

***** Trans: BatchB ******
Fleiss' Kappa: 0.2



In [17]:
cnt = Counter(coded_df.code_gender)
cnt = {key:val/len(coded_df) for key,val in cnt.items()}
cnt

{'Male': 0.44352290679304895,
 'Female': 0.44549763033175355,
 'Mixed': 0.08175355450236967,
 'Not sure': 0.026263823064770934,
 'Non-binary': 0.002962085308056872}