# Movement annotation III: Testing the interrater agreement between manual and automatic annotation

In this script, we prepare data to test the interrater agreement on movement annotation. To test the robustness, we compute interrater agreement between two human annotatos (AC, GR) and between each human annotator and the automatic annotation created in the previous script (XXX). We compute IA for each tier separately.

We use EasyDIAG (REF) to compute the IA, but document the results here in the table.


In [10]:
import os
import glob
import numpy as np
import pandas as pd
import xml.etree.ElementTree as ET

curfolder = os.getcwd()

# Here we store our merged processed files
processedfolder = os.path.join(curfolder + '\\..\\03_TS_processing\\TS_merged\\')
processedfiles = glob.glob(processedfolder + '*.csv')

# Here we store annotations from the logreg model
annotatedfolder = os.path.join(curfolder + '\\TS_annotated_logreg\\')
folders = glob.glob(annotatedfolder + '*\\')

folders60 = [x for x in folders if '0_6' in x] #60percent confidence
folders80 = [x for x in folders if '0_8' in x] #80percent confidence

# Here we store manual annotations from R1 (AC)
manualfolder1 = os.path.join(curfolder + '\\ManualAnno\\R1\\')
manualfiles1 = glob.glob(manualfolder1 + '*.eaf')
manualfiles1 = [x for x in manualfiles1 if 'ELAN_tiers' in x]

# Here we store manual annotations from R2 (GR)
manualfolder2 = os.path.join(curfolder + '\\ManualAnno\\R3\\')
manualfiles2 = glob.glob(manualfolder2 + '*.eaf')
manualfiles2 = [x for x in manualfiles2 if 'ELAN_tiers' in x]

# Here we store the txt files we need for EasyDIAG
interfolder = curfolder + '\\InterAg\\'

e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation
e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\..\03_TS_processing\TS_merged\
['e:\\FLESH_ContinuousBodilyEffort\\04_TS_movementAnnotation\\..\\03_TS_processing\\TS_merged\\merged_0_1_44_p0.csv', 'e:\\FLESH_ContinuousBodilyEffort\\04_TS_movementAnnotation\\..\\03_TS_processing\\TS_merged\\merged_0_1_0_p0.csv', 'e:\\FLESH_ContinuousBodilyEffort\\04_TS_movementAnnotation\\..\\03_TS_processing\\TS_merged\\merged_0_1_10_p1.csv', 'e:\\FLESH_ContinuousBodilyEffort\\04_TS_movementAnnotation\\..\\03_TS_processing\\TS_merged\\merged_0_1_11_p1.csv', 'e:\\FLESH_ContinuousBodilyEffort\\04_TS_movementAnnotation\\..\\03_TS_processing\\TS_merged\\merged_0_1_12_p1.csv', 'e:\\FLESH_ContinuousBodilyEffort\\04_TS_movementAnnotation\\..\\03_TS_processing\\TS_merged\\merged_0_1_13_p1.csv', 'e:\\FLESH_ContinuousBodilyEffort\\04_TS_movementAnnotation\\..\\03_TS_processing\\TS_merged\\merged_0_1_14_p1.csv', 'e:\\FLESH_ContinuousBodilyEffort\\0

## Preprocessing annotations

Now we need to get both manual and automatic annotations into format that EasyDIAG requires - so simple .txt files with timestamps and annotation values. For annotations that have been created by human annotators, we need to extract the timestamps and values from the .eaf files. 

In [3]:
# Function to parse ELAN file
def parse_eaf_file(eaf_file, rel_tiers):
    tree = ET.parse(eaf_file)
    root = tree.getroot()

    time_order = root.find('TIME_ORDER')
    time_slots = {time_slot.attrib['TIME_SLOT_ID']: time_slot.attrib['TIME_VALUE'] for time_slot in time_order}

    annotations = []
    relevant_tiers = {rel_tiers}
    for tier in root.findall('TIER'):
        tier_id = tier.attrib['TIER_ID']
        if tier_id in relevant_tiers:
            for annotation in tier.findall('ANNOTATION/ALIGNABLE_ANNOTATION'):
                # Ensure required attributes are present
                if 'TIME_SLOT_REF1' in annotation.attrib and 'TIME_SLOT_REF2' in annotation.attrib:
                    ts_ref1 = annotation.attrib['TIME_SLOT_REF1']
                    ts_ref2 = annotation.attrib['TIME_SLOT_REF2']
                    # Get annotation ID if it exists, otherwise set to None
                    ann_id = annotation.attrib.get('ANNOTATION_ID', None)
                    annotation_value = annotation.find('ANNOTATION_VALUE').text.strip()
                    annotations.append({
                        'tier_id': tier_id,
                        'annotation_id': ann_id,
                        'start_time': time_slots[ts_ref1],
                        'end_time': time_slots[ts_ref2],
                        'annotation_value': annotation_value
                    })

    return annotations

# Function to write ELAN into txt file
def ELAN_into_txt(txtfile, raterID, foi):
    with open(txtfile, 'w') as f:
        for file in foi:
            print('working on ' + file)
            # Filename
            filename = file.split('\\')[-1]
            # Parse ELAN file
            annotations = parse_eaf_file(file, 'head_mov')
            # Write annotations into txt file
            for annotation in annotations:
                f.write(f"{raterID}\t{annotation['start_time']}\t{annotation['end_time']}\t{annotation['annotation_value']}\t{filename}\n")


In [5]:
foi = manualfiles1  # here we store manual annotations that we want to convert into txt files
raterIDfile = 'R1'  # this is the rater as we name it in the txt files
raterID = 'R2'      # this is the ID we need for EasyDIAG (the software always needs R1 and R2)

# These are thhe files we want to create
txtfile_head = interfolder + raterIDfile + '_Manual_head.txt'
txtfile_upper = interfolder + raterIDfile + '_Manual_upper.txt'
txtfile_lower = interfolder + raterIDfile + '_Manual_lower.txt'
txtfile_arms = interfolder + raterIDfile + '_Manual_arms.txt'

# For each tier, extract the annotations from ELAN file and save them in a txt file
ELAN_into_txt(txtfile_head, raterID, foi)
ELAN_into_txt(txtfile_upper, raterID, foi)
ELAN_into_txt(txtfile_lower, raterID, foi)
ELAN_into_txt(txtfile_arms, raterID, foi)

working on e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\ManualAnno\R1\0_1_11_p1_ELAN_tiers.eaf
working on e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\ManualAnno\R1\0_1_12_p1_ELAN_tiers.eaf
working on e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\ManualAnno\R1\0_1_13_p1_ELAN_tiers.eaf
working on e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\ManualAnno\R1\0_1_14_p1_ELAN_tiers.eaf
working on e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\ManualAnno\R1\0_1_15_p1_ELAN_tiers.eaf
working on e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\ManualAnno\R1\0_1_16_p1_ELAN_tiers.eaf
working on e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\ManualAnno\R1\0_1_17_p1_ELAN_tiers.eaf
working on e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\ManualAnno\R1\0_1_20_p0_ELAN_tiers.eaf
working on e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\ManualAnno\R1\0_1_21_p0_ELAN_tiers.eaf
working on e:\FLESH_ContinuousBodilyE

For automatic annotations, we need to extract the timestamps and values from the .csv files. Before doing that, we need to handle two issues that stem from the the fact that the classifier can create flickering annotations, as the confidence values continuously vary throughout each trial.

Similarly to REF, we apply two rules to handle this flickering:
- Rule 1: If there is a nomovement event between two movement events that is shorter than 200 ms, this is considered as part of the movement event. 
- Rule 2: If there is a movement event between two nomovement events that is shorter than 200 ms, this is considered as part of the nomovement event.

Afterwards, we take the first movement event and the very last movement event, and consider everything in between as a movement. 

In [None]:
# Function to get chunks of annotations
def get_chunks(anno_df):
    anno_df['chunk'] = (anno_df['anno_values'] != anno_df['anno_values'].shift()).cumsum()
    anno_df['idx'] = anno_df.index

    # Calculate start and end of each chunk, grouped by anno_values, save also the first and last index
    chunks = anno_df.groupby(['anno_values', 'chunk']).agg(
        time_ms_min=('time_ms', 'first'),
        time_ms_max=('time_ms', 'last'),
        idx_min=('idx', 'first'),
        idx_max=('idx', 'last')
    ).reset_index()

    # Order the chunks
    chunks = chunks.sort_values('idx_min').reset_index(drop=True)

    return chunks

In [11]:
foi = folders60 # set which folder (threshold) you want to process

for folder in foi:
    # get tierID
    tier = folder.split('\\')[-2].split('_')[0]

    if tier == 'head':
        tier = 'head'
    elif tier == 'upperBody':
        tier = 'upper'
    elif tier == 'lowerBody':
        tier = 'lower'

    # This is the file we want to create
    txtfile = interfolder + 'AutoAnno_' + tier + '.txt'

    # List all files in the folder
    files = glob.glob(folder + '*.csv')

    for file in files:
        print('processing: ' + file)

        # Filename
        filename = file.split('\\')[-1].split('.')[0]
        filename = filename.split('_')[2:6]
        filename = '_'.join(filename)

        # Check if we have manual file matching to this file, otherwise skip
        manualfile = [x for x in manualfiles1 if filename in x]
        if len(manualfile) == 0:
            continue

        # Now we process the annotations made by the logreg model
        anno_df = pd.read_csv(file)

        # Chunk the df to see unique annotated chunks
        chunks = get_chunks(anno_df)

        # Check for fake pauses (i.e., nomovement annotation that last for less than 200ms)
        for i in range(1, len(chunks)-1):
            if chunks.loc[i, 'anno_values'] == 'no movement' and chunks.loc[i-1, 'anno_values'] == 'movement' and chunks.loc[i+1, 'anno_values'] == 'movement':
                if chunks.loc[i, 'time_ms_max'] - chunks.loc[i, 'time_ms_min'] < 200:
                    print('found a chunk of no movement between two movement chunks that is shorter than 200 ms')
                    # Change the chunk into movement
                    anno_df.loc[chunks.loc[i, 'idx_min']:chunks.loc[i, 'idx_max'], 'anno_values'] = 'movement'

        # Calculate new chunks
        chunks = get_chunks(anno_df)

        # Now check for fake movement (i.e., movement chunk that is shorter than 200ms)
        for i in range(1, len(chunks)-1):
            if chunks.loc[i, 'anno_values'] == 'movement' and chunks.loc[i-1, 'anno_values'] == 'no movement' and chunks.loc[i+1, 'anno_values'] == 'no movement':
                if chunks.loc[i, 'time_ms_max'] - chunks.loc[i, 'time_ms_min'] < 200:
                    print('found a chunk of movement between two no movement chunks that is shorter than 250 ms')
                    # change the chunk to no movement in the original df
                    anno_df.loc[chunks.loc[i, 'idx_min']:chunks.loc[i, 'idx_max'], 'anno_values'] = 'no movement'

        
        # Now, similarly to our human annotators, we consider movement anything from the very first movement to the very last movement
        if 'movement' in anno_df['anno_values'].unique():
            # Get the first and last index of movement
            first_idx = anno_df[anno_df['anno_values'] == 'movement'].index[0]
            last_idx = anno_df[anno_df['anno_values'] == 'movement'].index[-1]
            # Change all between to movement
            anno_df.loc[first_idx:last_idx, 'anno_values'] = 'movement'

        # Calculate new chunks
        chunks = get_chunks(anno_df)

        # Rewrite "no movement" in anno_values to "nomovement" (to match the manual annotations)
        chunks['anno_values'] = chunks['anno_values'].apply(
            lambda x: 'nomovement' if x == 'no movement' else x
        )

        # Add elanID to chunks (to match the manual annotations in EasyDIAG)
        chunks['elanID']  = str(filename + '_ELAN_tiers.eaf')

        # Write to the text file
        with open(txtfile, 'a') as f:
            for _, row in chunks.iterrows():
                f.write(
                    f"Anno_R1\t{row['time_ms_min']}\t{row['time_ms_max']}\t{row['anno_values']}\t{row['elanID']}\n"
                )


arms
e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\TS_annotated_logreg\arms_annotations_threshold_0_6\merged_anno_0_1_2_p0_annotated_threshold_0_6.csv
e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\TS_annotated_logreg\arms_annotations_threshold_0_6\merged_anno_0_1_8_p0_annotated_threshold_0_6.csv
e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\TS_annotated_logreg\arms_annotations_threshold_0_6\merged_anno_0_2_111_p1_annotated_threshold_0_6.csv
e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\TS_annotated_logreg\arms_annotations_threshold_0_6\merged_anno_0_2_112_p1_annotated_threshold_0_6.csv
e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\TS_annotated_logreg\arms_annotations_threshold_0_6\merged_anno_0_1_3_p0_annotated_threshold_0_6.csv
e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnnotation\TS_annotated_logreg\arms_annotations_threshold_0_6\merged_anno_0_1_4_p0_annotated_threshold_0_6.csv
e:\FLESH_ContinuousBodilyEffort\04_TS_movementAnn

## Creating txt files for EasyDIAG

EasyDIAG requires a txt file that contains all annotations of a tier from both annotators we wish to compare. We therefore need to merge the files we have created above into one file for each tier.

(Note that it is better to delete old files rather than let them overwrite because that can lead to some bugs in the files for which the agreement will be messy)

In [None]:
# These tiers we want to compare
toi = ['arms', 'head', 'upper', 'lower'] 

for tier in toi:
    print('working on ' + tier)

    txtfile_auto = interfolder + 'AutoAnno_' + tier + '.txt'        # this is the automatic annotator
    txtfile_manual_r1 = interfolder + 'R3_Manual_' + tier + '.txt'  # this is manual annotator (AC)
    txtfile_manual_r3 = interfolder + 'R1_Manual_' + tier + '.txt'  # this is manual annotator (GR)

    # For us R1 is the manual annotator, R3 is second manual annotator, R2 is the automatic annotator
    # But note that manual annotator is in the txt files always as R2, and automatic annotator is always R1 

    comp1 = 'R3'    # change here who you want to compare
    comp2 = 'R2'    # with whom

    # Read in the files we want to compare
    r1_anno = pd.read_csv(txtfile_auto, sep='\t', header=None)         # change here who you want to compare
    r2_anno = pd.read_csv(txtfile_manual_r3, sep='\t', header=None)    # with whom

    # Check that both files have the same number of files (EasyDIAG will ignore this mismatch and lower the agreement)
    files_to_check_r1 = r1_anno[4].unique()
    files_to_check_r2 = r2_anno[4].unique()
    files_to_check = list(set(files_to_check_r1) & set(files_to_check_r2))

    # Adapt both
    rows_auto = r1_anno[r1_anno[4].isin(files_to_check)]
    rows_manual = r2_anno[r2_anno[4].isin(files_to_check)]

    # And concatenate
    concat_rows = pd.concat([r1_anno, r2_anno])

    # Save as new file
    txtfile_IA_arms = interfolder + 'IA_' + comp1 + '_' + comp2 + '_' + tier + '_threshold60.txt' # adapt the threshold based on what you work with

    with open(txtfile_IA_arms, 'w') as f:
        for index, row in concat_rows.iterrows():
            f.write(f"{row[0]}\t{row[1]}\t{row[2]}\t{row[3]}\t{row[4]}\n")

working on arms
working on head
working on upper
working on lower


## Interrater agreement: results

Here we report the kappa coefficients for both thresholds of confidence.

In [25]:

# Create an empty DataFrame
IAtable = pd.DataFrame(columns=['comparison', 'arms_80', 'arms_60', 'head_80', 'head_60', 'upper_80', 'upper_60', 'lower_80', 'lower_60'])

# Append rows to the DataFrame
data = [
    {'comparison': 'R1_Auto', 'arms_80': 0.74, 'arms_60': 0.76, 'head_80': 0.64, 'head_60': 0.67, 'upper_80': 0.68, 'upper_60': 0.70, 'lower_80': 0.66, 'lower_60': 0.64},
    {'comparison': 'R3_Auto', 'arms_80': 0.82, 'arms_60': 0.85, 'head_80': 0.60, 'head_60': 0.63, 'upper_80': 0.72, 'upper_60': 0.68, 'lower_80': 0.78, 'lower_60': 0.77},
]

IA_manual_arms = 0.84
IA_manual_head = 0.67
IA_manual_upper = 0.70
IA_manual_lower = 0.74

manual_IA = {'arms': IA_manual_arms, 'head': IA_manual_head, 'upper': IA_manual_upper, 'lower': IA_manual_lower}

# Convert the list of dicts into a DataFrame and concatenate
IAtable = pd.concat([IAtable, pd.DataFrame(data)], ignore_index=True)

# Calculate which tier is closer to the manual IA
for tier in manual_IA.keys():
    IAtable[f'{tier}_closest'] = IAtable.apply(
        lambda row: '80' if abs(row[f'{tier}_80'] - manual_IA[tier]) < abs(row[f'{tier}_60'] - manual_IA[tier]) else '60',
        axis=1
    )

# Calculate which tier is generally higher
for tier in manual_IA.keys():
    IAtable[f'{tier}_higher'] = IAtable.apply(
        lambda row: '80' if row[f'{tier}_80'] > row[f'{tier}_60'] else '60',
        axis=1
    )

IAtable


  IAtable = pd.concat([IAtable, pd.DataFrame(data)], ignore_index=True)


Unnamed: 0,comparison,arms_80,arms_60,head_80,head_60,upper_80,upper_60,lower_80,lower_60,arms_closest,head_closest,upper_closest,lower_closest,arms_higher,head_higher,upper_higher,lower_higher
0,R1_Auto,0.74,0.76,0.64,0.67,0.68,0.7,0.66,0.64,60,60,60,80,60,60,60,80
1,R3_Auto,0.82,0.85,0.6,0.63,0.72,0.68,0.78,0.77,60,60,60,60,60,60,80,80


The results show that 60% confidence threshold for a movement is more in agreement with the human annotators. Moreover, the kappa coefficient is generally higher for 60% threshold for each tier, with exception of lower body. This might be given by the higher amount of noise for the articulators of the lower body.

In the next script, we will....TO DECIDE

a) work with 60% for all
b) work with 60% for arms, head and upper body, and 80% for lower body