In [None]:
import pandas as pd

# Load the tsv files
file1 = pd.read_csv('Processed_A1_Annotations', delimiter='\t')
file2 = pd.read_csv('processed_A2_annotations', delimiter='\t')

# Extract only the required columns from both dataframes
file1_subset = file1[['index', 'id1', 'id2', 'label']]
file2_subset = file2[['index', 'id1', 'id2', 'label']]

# Merge the two dataframes based on index, id1, and id2 columns
merged = file1_subset.merge(file2_subset, on=['index', 'id1', 'id2'], how='inner', suffixes=('_file1', '_file2'))

# Initialize counters for all categories
count_00 = 0
count_11 = 0
count_01 = 0
count_10 = 0

# Iterate through the rows and compare labels
for _, row in merged.iterrows():
    label1 = row['label_file1']
    label2 = row['label_file2']

    if label1 == 0 and label2 == 0:
        count_00 += 1
    elif label1 == 1 and label2 == 1:
        count_11 += 1
    elif label1 == 0 and label2 == 1:
        count_01 += 1
    elif label1 == 1 and label2 == 0:
        count_10 += 1

print(f"Count of A1 labelling non-causal and A2 labelling non-causal: {count_00}")
print(f"Count of A1 labelling causal and A2 labelling causal: {count_11}")
print(f"Count of A1 labelling non-causal and A2 labelling causal: {count_01}")
print(f"Count of A1 labelling causal and A2 labelling non-causal: {count_10}")


In [None]:
def calculate_cohens_kappa(count_00, count_11, count_01, count_10):
    # Total number of observations
    n = count_00 + count_11 + count_01 + count_10
    
    # Observed agreement
    Po = (count_00 + count_11) / n
    
    # Expected agreement
    Pe = ((count_00 + count_01) * (count_00 + count_10) + (count_11 + count_10) * (count_11 + count_01)) / n**2
    
    # Cohen's kappa
    kappa = (Po - Pe) / (1 - Pe)
    
    return kappa

# Calculate Cohen's kappa
kappa = calculate_cohens_kappa(count_00, count_11, count_01, count_10)
print(f"Cohen's kappa: {kappa}")