In [1]:
import numpy as np
from collections import Counter
import pandas as pd

In [2]:
def nominal_distance(a, b):
    """
    Distance metric for nominal data:
    Returns 0 if a == b, 1 otherwise
    """
    return 0 if a == b else 1

def krippendorff_alpha(data, distance_metric=nominal_distance):
    """
    Calculate Krippendorff's alpha for reliability of coding.

    Parameters:
    data : numpy array (units as rows, raters as columns)
           Each cell contains the rating for that unit by that rater.
           Missing values should be represented as np.nan.

    distance_metric : function that computes the distance between any two values
                     For nominal data, we use a simple metric that returns 0 if values are
                     identical and 1 if they are different.

    Returns:
    alpha : float
           Krippendorff's alpha coefficient
    """
    data = np.array(data, dtype=float)

    n_units, n_raters = data.shape

    data_flat = data.flatten()
    valid_indices = ~np.isnan(data_flat)
    data_valid = data_flat[valid_indices]

    value_counts = Counter(data_valid)

    values = list(value_counts.keys())

    observed_disagreement = 0
    n_pairs = 0

    for unit in range(n_units):
        unit_ratings = data[unit, :]

        unit_ratings = unit_ratings[~np.isnan(unit_ratings)]

        if len(unit_ratings) >= 2:
            for i in range(len(unit_ratings)):
                for j in range(i+1, len(unit_ratings)):
                    observed_disagreement += distance_metric(unit_ratings[i], unit_ratings[j])
                    n_pairs += 1

    if n_pairs == 0:
        return np.nan
    observed_disagreement /= n_pairs

    expected_disagreement = 0
    n_valid = len(data_valid)

    for value1 in values:
        for value2 in values:
            count1 = value_counts[value1]
            count2 = value_counts[value2]

            prob = (count1 * count2) / (n_valid * (n_valid - 1))

            expected_disagreement += distance_metric(value1, value2) * prob

    if expected_disagreement == 0:
        return 1.0
    else:
        return 1 - (observed_disagreement / expected_disagreement)

## First round of papers, week of Mar 10th, 2025
# if __name__ == "__main__":
#     data = np.array([
#         [0,0,0],
#         [1,1,1],
#         [1,1,1],
#         [1,1,1],
#         [0,1,1],
#         [1,1,0],
#         [0,1,0],
#         [1,1,1],
#         [0,0,0],
#         [0,0,0],
#         [0,0,0],
#         [np.nan,1,0],
#         [1,1,0],
#         [1,1,0],
#         [1,0,0],
#         [0,1,0],
#         [1,np.nan,0],
#         [1,1,0],
#         [1,1,1],
#         [1,1,0],
#     ])

## Second round of papers, week of Mar 19th, 2025
# if __name__ == "__main__":
#     data = np.array([
#         [0,1,0],
#         [1,1,1],
#         [1,0,1],
#         [0,0,0],
#         [1,1,1],
#         [1,1,1],
#         [1,0,1],
#         [1,1,1],
#         [0,1,0],
#         [0,0,0],
#         [0,1,0],
#         [1,1,1],
#         [1,1,1],
#         [0,0,0],
#         [0,1,0],
#         [0,0,0],
#         [0,0,0],
#         [0,1,0],
#         [0,0,0],
#         [0,0,0],
#     ])

## Third round of papers, week of Mar 27th, 2025
if __name__ == "__main__":
    data = np.array([
      [0,0,0],
      [0,0,0],
      [0,0,0],
      [1,1,1],
      [0,0,0],
      [1,1,1],
      [1,1,1],
      [1,1,1],
      [0,0,0],
      [0,0,0],
      [1,1,1],
      [0,0,0],
      [1,1,1],
      [0,0,0],
      [0,0,0],
      [0,0,0],
      [0,0,0],
      [1,1,1],
      [1,1,1],
      [1,1,1],
    ])

    alpha = krippendorff_alpha(data)
    print(f"Krippendorff's alpha: {alpha:.4f}")

    # Interpretation:
    # > 0.8: very good reliability
    # 0.67 - 0.8: good reliability
    # 0.4 - 0.67: fair reliability
    # < 0.4: poor reliability

    def create_reliability_dataframe(data, alpha):
        df = pd.DataFrame(data, columns=[f"Rater {i+1}" for i in range(data.shape[1])])
        df.index.name = "Unit"
        df.loc["Agreement"] = [f"Krippendorff's alpha: {alpha:.4f}"] + [""] * (data.shape[1] - 1)
        return df

    reliability_df = create_reliability_dataframe(data, alpha)
    print("\nReliability analysis:")
    print(reliability_df)

Krippendorff's alpha: 1.0000

Reliability analysis:
                                Rater 1 Rater 2 Rater 3
Unit                                                   
0                                     0       0       0
1                                     0       0       0
2                                     0       0       0
3                                     1       1       1
4                                     0       0       0
5                                     1       1       1
6                                     1       1       1
7                                     1       1       1
8                                     0       0       0
9                                     0       0       0
10                                    1       1       1
11                                    0       0       0
12                                    1       1       1
13                                    0       0       0
14                                    0       0     