# Estimating the Intra Rater Reliability

Using the extended Kraemer (1980) Kappa.

The statistic is calculated as the proportion of agreement between labels corrected for chance agreements:

$$ \kappa_0 = \frac{\bar{P} - P_e}{1-P_e} + \frac{(1-\bar{P})}{Nm_0(1-P_e)} $$

Where $\bar{P}$ is the average proportion of agreement pairs.

In [69]:
import pandas as pd
import numpy as np
from pathlib import Path
from swinno_bioeconomy_directionality.utils import get_project_root
from swinno_bioeconomy_directionality.swinno_helpers import split_cols, melt_table

ROOT = get_project_root()

In [89]:

df = pd.read_excel(Path(ROOT, "data/modified-data", "crude_irr.xlsx"), usecols="A:C")
df.head()

Unnamed: 0,sinno_id,innovation_type_2,innovation_type
0,6746001,211,"105, 203"
1,6933001,701,104
2,7570001,103204,204
3,7653001,"602, 501",601501
4,7770001,9,501


In [90]:
df_split_1 = split_cols(df, "innovation_type_2", ",")
df_split_1


Unnamed: 0,sinno_id,innovation_type,innovation_type_2_0,innovation_type_2_1,innovation_type_2_2
0,6746001,"105, 203",211,,
1,6933001,104,701,,
2,7570001,204,103,204.0,
3,7653001,601501,602,501.0,
4,7770001,501,9,,
5,7856001,501,9,,
6,8039001,9,9,,
7,8078001,501,9,,
8,8193001,105106,0,,
9,8484001,9,9,,


In [91]:
df_split = split_cols(df_split_1, "innovation_type", ",")
df_split = df_split.applymap(lambda x: x.strip() if isinstance(x, str) else x)
df_split

Unnamed: 0,sinno_id,innovation_type_2_0,innovation_type_2_1,innovation_type_2_2,innovation_type_0,innovation_type_1,innovation_type_2
0,6746001,211,,,105,203.0,
1,6933001,701,,,104,,
2,7570001,103,204.0,,204,,
3,7653001,602,501.0,,601,501.0,
4,7770001,9,,,501,,
5,7856001,9,,,501,,
6,8039001,9,,,9,,
7,8078001,9,,,501,,
8,8193001,0,,,105,106.0,
9,8484001,9,,,9,,


In [92]:

obs = melt_table(df_split, "sinno_id", "innovation", value_name ="labels")
obs

Unnamed: 0,sinno_id,labels
0,6746001,211
1,6746001,203
2,6746001,105
3,6933001,701
4,6933001,104
5,7570001,204
6,7570001,103
7,7570001,204
8,7653001,501
9,7653001,601


In [93]:
n_labels = obs.groupby("sinno_id").count().reset_index()["labels"]
sum_labels = sum(n_labels)

In [122]:
obs_count = obs.groupby(obs.columns.tolist()).size().reset_index()
n_agree = len(obs_count[obs_count[0] > 1])
n_labels = len(obs["labels"].unique())

In [98]:
n_agree / sum_labels

0.1896551724137931

In [10]:
p = len(count_labels[count_labels["count_labels"] > 1])

In [104]:
def possible_pairs(x, y):
    import math
    return ( math.factorial(x) / (math.factorial(y)*math.factorial(x - y)) )

In [128]:
def generate_combinations(x, n):
    if n == 1:
        return [[e] for e in x]
    elif n == len(x):
        return [x]
    else:
        result = []
        for i in range(len(x)):
            rest = x[i+1:]
            for comb in generate_combinations(rest, n-1):
                result.append([x[i]] + comb)
        return result

def generate_all_combinations(x):
    result = []
    for i in range(1, len(x)+1):
        result.extend(generate_combinations(x, i))
    return result

x = [1, 2, 3, 4]
combinations = generate_all_combinations(obs.labels.unique())
len(combinations)


131071

In [130]:
n_agree / (23 * len(combinations))

3.648868701430655e-06

Now that I have the number of concordant pairs, I need to find their average proportion.
This means I need to find the number of all possible pairs.
What are possible pairs?
The number of unique labels used, or the unique number of possible labels?
Probably the first.

 

In [12]:
obs.labels.unique()

array(['211', ' 203', '105', '701', '104', '204', '103', '501', '601',
       ' 501', '602', '9', '106', '0', '207', '101', '203', '113', '110'],
      dtype=object)

In [13]:
possible_labels = len(obs["labels"].unique())
possible_labels

19

In [119]:
from itertools import combinations

combs = combinations(obs["labels"].unique(), len(obs["labels"].unique()))

In [121]:
len(obs["labels"].unique())

17

In [15]:
def kappa(p_bar, p_e, N, m):
    return ((p_bar - p_e) / (1-p_e)) + ((1-p_bar) / N*m(1-p_e))

In [20]:
def p_e(p):
    return sum([p**2 for p in p])

In [21]:
prop_obs = obs.groupby("sinno_id").count()["labels"]
p_e(prop_obs)

168

In [44]:
from krippendorff import alpha

# Define the data
obs_1 = [[123, 234], [123, 345, 456]]
obs_2 = [[123, 234], [123]]

# Combine the labels assigned by the two raters for each observation
data = []
for i in range(len(obs_1)):
    combined_labels = obs_1[i] + obs_2[i]
    data.append(combined_labels)

# Calculate Krippendorff's alpha using the nominal metric
alpha_value = alpha(data, level_of_measurement=='nominal')

print("Krippendorff's alpha:", alpha_value)

NameError: name 'level_of_measurement' is not defined

In [46]:
# Define the data
obs_1 = [[123, 234], [123, 345, 456]]
obs_2 = [[123, 234], [123]]

# Combine the labels assigned by the two raters for each observation
data = []
for i in range(len(obs_1)):
    combined_labels = obs_1[i] + obs_2[i]
    data.append(combined_labels)


In [48]:
test_df = pd.DataFrame(data)
test_df

Unnamed: 0,0,1,2,3
0,123,234,123,234
1,123,345,456,123
