In [1]:
import pandas as pd
import numpy as np
import ast
import seaborn as sns
import matplotlib.pyplot as plt
import pickle
from statsmodels.stats.proportion import proportions_chisquare_allpairs, proportions_ztest
from statsmodels.stats.multitest import multipletests
from sklearn.metrics import cohen_kappa_score
import random
import networkx as nx

# pd.options.display.max_columns=150
# pd.set_option('display.max_colwidth', None)

In [2]:
def annotation_text_to_list(text):
    """
    Parses annotation text and extracts a list of fallacies.

    This function attempts to parse the input text as a dictionary (Example of text: {"choices":["Loaded Language","Ad Hominem"]}).
    If successful and the dictionary contains a key named 'choices', it extracts the values associated with that key
    and returns them as a list. If parsing fails or the 'choices' key is not found, the function
    returns a list containing only the original text.

    Args:
        text (str): The annotation text to be parsed.

    Returns:
        list: A list of fallacies extracted from the annotation text. If parsing fails,
              returns a list containing the original text.
    """
    try:
        # Try to parse the text as a dictionary
        parsed = ast.literal_eval(text)
        if isinstance(parsed, dict) and 'choices' in parsed:
            # Extract values if 'choices' key exists
            return parsed['choices']
    except (SyntaxError, ValueError):
        pass
    # If not a dictionary or 'choices' key doesn't exist, return a list with the original text
    return [text]

def process_annotations_csv(csv_file):

    """
    Processes an annotations CSV file exported from Label Studio. Creates one-hot encoded columns in a dataframe. Each binary column corresponds to a label

    Args:
        csv_file (str): The path to the CSV file containing annotation data.

    Returns:
        tuple: A tuple containing two data frames. The first data frame is the processed data with additional columns for each fallacy,
               the second data frame contains the dummy variables created from the 'fallacy' column.
    """

    # Load annotations
    data = pd.read_csv(csv_file)
    if 'annotation_id' in data.columns:
        data = data.drop(columns=['annotation_id'])
    if 'created_at' in data.columns:
        data = data.drop(columns=['created_at'])
    data['new_id'] = data['new_id'].astype('string')

    # Transform json like text to lists
    data['fallacy'] = data['fallacy'].apply(annotation_text_to_list)

    # Transform lists to dummy variables
    dummy_variables = data['fallacy'].str.join('|').str.get_dummies()

    # Move 'None of the above' to the last column on the dataset
    col2 = dummy_variables.pop('None of the above')
    position = dummy_variables.shape[1]
    dummy_variables.insert(position, 'None of the above', col2)

    # Add dummies to general data
    data = pd.concat([data, dummy_variables], axis=1)

    return data, dummy_variables

# Interannotator agreement

Considering the following categorization of the Cohen's kappa *Hasty Generalization* shows only slight agreement; *False Dilemma* and *Loaded Language*, fair agreement; the other 4 categories show moderate agreement.  

* < 0: No agreement (worse than chance)
* 0.01-0.20: Slight agreement
* 0.21-0.40: Fair agreement
* 0.41-0.60: Moderate agreement
* 0.61-0.80: Substantial agreement
* 0.81-1.00: Almost perfect agreement

In [3]:
# Load annotations for second round for second annotator and ressessment annotations
df_annotations_multiple2_annotator2, dummy_variables = process_annotations_csv('datasets/annotated_datasets/annotations_multiple2_annotator2.csv')
df_multilabel_annotations = pd.read_csv("datasets/annotated_datasets/df_multilabel_annotations.csv")
df_multilabel_annotations['new_id'] = df_multilabel_annotations['new_id'].astype(str)

# Get names from dummy_variables
fallacy_names = list(dummy_variables.columns)

# Interannotator agreement by fallacy

# Merge dataframes based on ID
merged_df = pd.merge(df_multilabel_annotations[['new_id'] + fallacy_names],
                     df_annotations_multiple2_annotator2[['new_id'] + fallacy_names],
                     on='new_id', suffixes=('_1', '_2'), how='inner')

kappa = []
# Calculate Cohen's kappa for each fallacy
for variable in fallacy_names:
    annotator_1_labels = merged_df[variable + '_1']
    annotator_2_labels = merged_df[variable + '_2']

    kappa.append(cohen_kappa_score(annotator_1_labels, annotator_2_labels))

# Create a DataFrame with the results
print('Average kappa', np.mean(kappa))
df_kappa = pd.DataFrame({'fallacy': fallacy_names, 'kappa': kappa})
df_kappa.sort_values(by='kappa', ascending=False)

Average kappa 0.666603705766611


Unnamed: 0,fallacy,kappa
1,Appeal to Fear,0.805195
0,Ad Hominem,0.7921
2,Appeal to Ridicule,0.767442
6,None of the above,0.724771
5,Loaded Language,0.562363
3,False Dilemma,0.554896
4,Hasty Generalization,0.459459
