In [3]:
#!/usr/bin/env python

'''
Created on Aug 1, 2016
@author: skarumbaiah

Computes Fleiss' Kappa 
Joseph L. Fleiss, Measuring Nominal Scale Agreement Among Many Raters, 1971.
'''

def checkInput(rate, n):
    """ 
    Check correctness of the input matrix
    @param rate - ratings matrix
    @return n - number of raters
    @throws AssertionError 
    """
    N = len(rate)
    k = len(rate[0])
    assert all(len(rate[i]) == k for i in range(k)), "Row length != #categories)"
    assert all(isinstance(rate[i][j], int) for i in range(N) for j in range(k)), "Element not integer" 
    assert all(sum(row) == n for row in rate), "Sum of ratings != #raters)"

def fleissKappa(rate,n):
    """ 
    Computes the Kappa value
    @param rate - ratings matrix containing number of ratings for each subject per category 
    [size - N X k where N = #subjects and k = #categories]
    @param n - number of raters   
    @return fleiss' kappa
    """

    N = len(rate)
    k = len(rate[0])
    print("#raters = ", n, ", #subjects = ", N, ", #categories = ", k)
    checkInput(rate, n)

    #mean of the extent to which raters agree for the ith subject 
    PA = sum([(sum([i**2 for i in row])- n) / (n * (n - 1)) for row in rate])/N
    print("PA = ", PA)
    
    # mean of squares of proportion of all assignments which were to jth category
    PE = sum([j**2 for j in [sum([rows[i] for rows in rate])/(N*n) for i in range(k)]])
    print("PE =", PE)
    
    kappa = -float("inf")
    try:
        kappa = (PA - PE) / (1 - PE)
        kappa = float("{:.3f}".format(kappa))
    except ZeroDivisionError:
        print("Expected agreement = 1")

    print("Fleiss' Kappa =", kappa)
    
    return kappa

In [10]:


if __name__ == "__main__":
    
    print("Example run to calculate Fleiss' Kappa")
    
    print("\nTest case 1 - Fleiss 1971")
    #Fleiss, 1971 example
    rate = \
    [
        [0,0,0,6,0],
        [0,3,0,0,3],
        [0,1,4,0,1],
        [0,0,0,0,6],
        [0,3,0,3,0],
        [2,0,4,0,0],
        [0,0,4,0,2],
        [2,0,3,1,0],
        [2,0,0,4,0],
        [0,0,0,0,6],
        [1,0,0,5,0],
        [1,1,0,4,0],
        [0,3,3,0,0],
        [1,0,0,5,0],
        [0,2,0,3,1],
        [0,0,5,0,1],
        [3,0,0,1,2],
        [5,1,0,0,0],
        [0,2,0,4,0],
        [1,0,2,0,3],
        [0,0,0,0,6],
        [0,1,0,5,0],
        [0,2,0,1,3],
        [2,0,0,4,0],
        [1,0,0,4,1],
        [0,5,0,1,0],
        [4,0,0,0,2],
        [0,2,0,4,0],
        [1,0,5,0,0],
        [0,0,0,0,6]
    ]
    kappa = fleissKappa(rate,6)
    assert(kappa==0.43)
    
    print("\nTest case 2 - Wikipedia example")
    #wikipedia example
    rate = \
    [
        [0,0,0,0,14],
        [0,2,6,4,2],
        [0,0,3,5,6],
        [0,3,9,2,0],
        [2,2,8,1,1],
        [7,7,0,0,0],
        [3,2,6,3,0],
        [2,5,3,2,2],
        [6,5,2,1,0],
        [0,2,2,3,7]
    ]
    kappa = fleissKappa(rate,14)
    assert(kappa==0.21)
    
    print("\nTest case 3 - perfect agreement")
    #kappa = 1
    rate = \
    [
        [2,0,0,0],
        [0,2,0,0],
        [0,0,2,0],
        [0,0,0,2]
    ]
    kappa = fleissKappa(rate,2)
    assert(kappa==1)
    
    print("\nTest case 4 - perfect expected agreement")
    #kappa = -inf (not defined as divide by 0)
    rate = \
    [
        [2,1,0],
        [0,2,1],
        [0,0,3],
        [1,1,1],
        [3,0,0],
        [2,0,1],
        [3,0,0],
        [0,0,3],
        [0,3,0],
        [2,0,1],
        [0,3,0],
        [0,3,0],
        [0,0,3],
        [0,3,0],
        [2,1,0],
        [0,2,1],
        [3,0,0],
        [0,0,3],
        [0,0,3],
        [3,0,0]
    ]
    kappa = fleissKappa(rate,3)
    # assert(kappa==-float("inf"))


Example run to calculate Fleiss' Kappa

Test case 1 - Fleiss 1971
#raters =  6 , #subjects =  30 , #categories =  5
PA =  0.5555555555555556
PE = 0.21993827160493828
Fleiss' Kappa = 0.43

Test case 2 - Wikipedia example
#raters =  14 , #subjects =  10 , #categories =  5
PA =  0.378021978021978
PE = 0.21275510204081632
Fleiss' Kappa = 0.21

Test case 3 - perfect agreement
#raters =  2 , #subjects =  4 , #categories =  4
PA =  1.0
PE = 0.25
Fleiss' Kappa = 1.0

Test case 4 - perfect expected agreement
#raters =  3 , #subjects =  20 , #categories =  3
PA =  0.75
PE = 0.33388888888888885
Fleiss' Kappa = 0.625
