In [1]:
# INSTALLATION
%%capture
!pip install krippendorff

In [2]:
# IMPORT
import numpy as np
import krippendorff
import pandas as pd

# CONSTANTS
testname = 'Annotations_test.csv'
trainname = 'Annotations_train.csv'
annotators = 4
sep = '\t'

In [3]:
# READING FILEs
def reading(filename, sep='\t', verbose=True):
  df = pd.read_csv(filename, sep=sep)
  if verbose:
    print(df.shape)
    print(df.head())
    
  df.dropna(subset = ["Texts"], inplace=True)
  if verbose:
    print(df.shape)
    print(df.head())
  return df

traindf = reading(trainname, sep, False)
testdf = reading(testname, sep, False)
train_ann = pd.DataFrame()
test_ann = pd.DataFrame()

for i in range(annotators):
  train_ann['Ann_' + str(i)] = traindf['Ann_' + str(i)]
  test_ann['Ann_' + str(i)] = testdf['Ann_' + str(i)]

In [4]:
# TRANSFORM DATA (formatting and removing nans with 0s)
def transformdata(df, verbose=True):
  data = np.array(df)
  if verbose:
    print(len(data), len(data[0]))
    print(data)
  annotations = []
  for col in df:  # For each annotator (col)
    annotator_list = []
    for row in df[col]: # For each annotation (row)
      try:
        annotator_list.append(int(row))
      except:
        annotator_list.append(np.NaN)
    annotations.append(annotator_list)
  data = np.array(annotations)
  if verbose:
    print(len(data), len(data[0]))
    print(data)
  data = np.nan_to_num(data, nan=0) # Substitute nans with 0s
  if verbose:
    print(len(data), len(data[0]))
    print(data)
  return data

traindata = transformdata(train_ann, verbose=False)
testdata = transformdata(test_ann, verbose=False)

In [5]:
# SUBSTITUTE ANY NUMBER a WITH ANY NUMBER b
def substitutedata(data, a, b, verbose=True):
  data = np.where(data==a, b, data)
  if verbose:
    print(len(data), len(data[0]))
    print(data)
  return data

traindata = substitutedata(traindata, 2, 1, False)
traindata = substitutedata(traindata, 3, 1, False)

testdata = substitutedata(testdata, 2, 1, False)
testdata = substitutedata(testdata, 3, 1, False)

In [6]:
alpha_train = krippendorff.alpha(reliability_data=traindata, level_of_measurement='nominal')
alpha_test = krippendorff.alpha(reliability_data=testdata, level_of_measurement='nominal')

print(alpha_train)
print(alpha_test)

0.47493750626794484
0.6114656322989653
