In [10]:
import json
import numpy as np
from collections import defaultdict
from statsmodels.stats.inter_rater import fleiss_kappa


# Load annotations from a single JSON file
def load_annotations(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)


# Combine annotations from multiple files
def load_multiple_files(file_paths):
    all_annotations = []
    for file_path in file_paths:
        annotations = load_annotations(file_path)
        all_annotations.extend(annotations)
    return all_annotations


# Convert the annotations to a matrix of classifications
def create_classification_matrix(annotations):
    # Initialize a dictionary to hold counts of classifications for each image
    image_classifications = defaultdict(list)

    # Organize annotations by image ID
    for annotation in annotations:
        image_id = annotation['image'][-11:]
        choice = annotation['choice']
        image_classifications[image_id].append(choice)

    # Create a matrix where each row represents an image and columns represent counts of "Truck" and "No Trucks"
    matrix = []
    for image_id, choices in image_classifications.items():
        # Count how many chose "Truck" and how many chose "No Trucks"
        count_truck = choices.count('Truck')+choices.count('Trucks')
        count_no_truck = choices.count('No Trucks')+choices.count('No Truck')

        # Add the row to the matrix
        matrix.append([count_truck, count_no_truck])

    return np.array(matrix)


# Compute Fleiss' Kappa
def compute_fleiss_kappa(matrix):
    # Statsmodels' Fleiss' Kappa function expects each row to represent an item (image)
    # and each column to represent the count of raters who selected each category.
    kappa = fleiss_kappa(matrix)
    return kappa


# Main function
def main(file_paths):
    annotations = load_multiple_files(file_paths)
    matrix = create_classification_matrix(annotations)
    kappa = compute_fleiss_kappa(matrix)
    print(f"Fleiss' Kappa: {kappa}")


# Example usage
if __name__ == "__main__":
    file_paths = ['cv1_v1.json', 'cv_2.json', 'cv3.json']  # Replace with your actual file paths
    main(file_paths)



Fleiss' Kappa: 0.6666666666666665


In [11]:
from sklearn.metrics import cohen_kappa_score
from collections import defaultdict
import json

# Helper function to create a mapping from text to labels
def create_label_mapping(annotation):
    label_mapping = defaultdict(list)
    # Access the 'label' key, which should contain a list of dictionaries
    for item in annotation['label']:
        # Each item in this list is a dictionary
        label_mapping[item['text']].append(item['labels'][0])
    return label_mapping

# Function to calculate Cohen's Kappa between two annotations
def calculate_cohen_kappa(annotator1_json, annotator2_json):
    # Load JSON data
    with open(annotator1_json, 'r', encoding='utf-8') as f:
        annotator1_data = json.load(f)
    with open(annotator2_json, 'r', encoding='utf-8') as f:
        annotator2_data = json.load(f)

    # Create label mappings for both annotations
    label_mapping1 = create_label_mapping(annotator1_data[0]) # Pass the first dictionary in the list
    label_mapping2 = create_label_mapping(annotator2_data[0]) # Pass the first dictionary in the list


    # Get the union of all texts from both annotations
    all_texts = set(label_mapping1.keys()).union(set(label_mapping2.keys()))

    # Create label lists for Cohen's Kappa calculation
    labels1 = []
    labels2 = []

    for text in all_texts:
        labels1.append(label_mapping1.get(text, ['None'])[0])  # Default to 'None' if no label
        labels2.append(label_mapping2.get(text, ['None'])[0])  # Default to 'None' if no label

    # Calculate Cohen's Kappa
    kappa = cohen_kappa_score(labels1, labels2)
    return kappa

# Example usage
annotator1_json = '/content/NEPjson1.json'  # Replace with actual file path
annotator2_json = '/content/nlp_v2.json'  # Replace with actual file path
kappa_score = calculate_cohen_kappa(annotator1_json, annotator2_json)
print(f'Cohen\'s Kappa: {kappa_score}')

Cohen's Kappa: 0.6285714285714286
