In [1]:
from openpyxl import Workbook, load_workbook
import docx
import re
from docx import Document
from string import punctuation
import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score
from nltk.tokenize import sent_tokenize, word_tokenize
from statsmodels.stats import inter_rater as irr
import matplotlib.pyplot as plt 

import os
import re
import copy
import sys

import warnings
warnings.filterwarnings('ignore')

In [2]:
root_folder = 'input_files_v3\excel annotations\samle set 05-01-2025 AS JM'

In [3]:
rater_list = []
rater_filepaths = {}
file_filepaths = {}
rater_folders = os.listdir(root_folder)

for rater_folder in rater_folders:
    rater = rater_folder.split()[1]
    rater_list.append(rater)
    # annotated_folder = os.listdir(os.path.join(root_folder, rater_folder))[0]
    annotated_folder_path = os.path.join(root_folder, rater_folder)
    annotated_files = os.listdir(annotated_folder_path)
    for annotated_file in annotated_files:
        annotated_file_path = os.path.join(annotated_folder_path, annotated_file)
        if rater not in rater_filepaths:
            rater_filepaths[rater] = []
        rater_filepaths[rater].append(annotated_file_path)

        if annotated_file not in file_filepaths:
            file_filepaths[annotated_file] = []
        file_filepaths[annotated_file].append(annotated_file_path)

In [4]:
rater_list

['AS', 'JM']

In [5]:
for rater, filepaths in rater_filepaths.items():
    print(f'{rater} - {len(filepaths)} files.')

AS - 130 files.
JM - 130 files.


In [6]:
# for file, filepaths in file_filepaths.items():
#     print(f'{file} - {len(filepaths)} annotations.')

In [7]:
def extract_ordered_annotated_text(doc_path):
    doc = Document(doc_path)
    text = "\n".join([para.text for para in doc.paragraphs])

    extracted_texts = []
    annotations = []

    # Create a regex pattern to match all annotation types
    combined_pattern = "|".join(
        f"({re.escape(start)}(.*?){re.escape(end)})" for start, end in annotation_markers.values()
    )

    matches = re.finditer(combined_pattern, text, re.DOTALL)

    for match in matches:
        matched_text = match.group(0)  # Full matched annotation
        for label, (start_tag, end_tag) in annotation_markers.items():
            if matched_text.startswith(start_tag) and matched_text.endswith(end_tag):
                content = matched_text[len(start_tag):-len(end_tag)].strip()  # Extract text inside markers
                extracted_texts.append(content)
                annotations.append(label)
                break  # Stop checking once a match is found

    final_text, final_annotation = [], []
    
    for text_segment, annotation in zip(extracted_texts, annotations):
        for text in sent_tokenize(text_segment):
            final_text.append(text)
            final_annotation.append(annotation)
            
    return final_text, final_annotation

In [8]:
# for file, filepaths in file_filepaths.items():
#     print(file)
#     for filepath in filepaths:
#         final_text, final_annotation = extract_ordered_annotated_text(filepath)
#         print(filepath, len(final_text), len(final_annotation))
#     print()

In [9]:
# for rater, filepaths in rater_filepaths.items():
#     print(rater)
#     for filepath in filepaths:
#         final_text, final_annotation = extract_ordered_annotated_text(filepath)
#         print(filepath, len(final_text), len(final_annotation))
#     print()

In [10]:
all_labels = ['irrelevent', 'thesis', 'evidence', 'reasoning', 'transition', 'maintaining progressing claim', 'summary']
all_raters = rater_list #['AS', 'CA', 'JM', 'OM'] # , 'SJ'

In [11]:
rater_consistency = {}
for rater in all_raters:
    rater_consistency[rater] = []

label_consistency = {}
for labels in all_labels:
    label_consistency[labels] = []

agreement_per_file = []

for file, filepaths in file_filepaths.items():
    df_annotations = pd.DataFrame()
    
    for filepath in filepaths:
        rater = filepath.split('\\')[3].split()[1]
        rater_annotation = pd.read_excel(filepath)
        rater_annotation.dropna(inplace=True)
        rater_annotation['label'][rater_annotation['label'].isna()] = 'irrelevent'
        df_annotations[rater] = rater_annotation['label']

    df_annotations = df_annotations.fillna('irrelevent')
    
    for i in range(len(all_raters)-1):
        rater1 = all_raters[i]
        for j in range(i+1, len(all_raters)):
            rater2 = all_raters[j]
            agg = irr.aggregate_raters(df_annotations[[rater1, rater2]])
            fleiss_kappa_score_pair = irr.fleiss_kappa(agg[0], method='fleiss')
            rater_consistency[rater1].append(fleiss_kappa_score_pair)
            rater_consistency[rater2].append(fleiss_kappa_score_pair)
    
    agg = irr.aggregate_raters(df_annotations[all_raters])
    fleiss_kappa_score = irr.fleiss_kappa(agg[0], method='fleiss')
    agreement_per_file.append(fleiss_kappa_score)
    print(f'{file} - {fleiss_kappa_score}')

    for label in all_labels:
        df_label = df_annotations[all_raters].copy(deep=True)
        for col in df_label.columns:
            df_label[col] = df_label[col] == label
        agg = irr.aggregate_raters(df_label)
        fleiss_kappa_score_label = irr.fleiss_kappa(agg[0], method='fleiss')
        if np.isnan(fleiss_kappa_score_label):
            fleiss_kappa_score_label = 1
        label_consistency[label].append(fleiss_kappa_score_label)

print()
print(f'Average Overall Consistency - {np.mean(agreement_per_file)}')
print()
print('Rater Consistency:')
for key, value in rater_consistency.items():
    print(f'  {key} - {np.mean(value)}')
print()
print('Label Consistency:')
for key, value in label_consistency.items():
    print(f'  {key} - {np.mean(value)}')

11167-FinalDraft-ndvji.xlsx - 0.5295777960719745
11168-FinalDraft-9u47i.xlsx - 0.7132011809363138
11169-FinalDraft-qfxfh.xlsx - 0.42290145985401456
11170-FinalDraft-sp64k.xlsx - 0.18040979510244884
11171-FinalDraft-lck95.xlsx - 0.47829769214482304
11172-FinalDraft-tysm7.xlsx - 0.5605815831987075
11173-FinalDraft-rt4lv.xlsx - 0.5723951285520976
11174-FinalDraft-pzez5.xlsx - 0.6005509641873279
11175-FinalDraft-htopu.xlsx - 0.6459016393442623
11178-FinalDraft-rvq27.xlsx - 0.7328155339805824
11180-FinalDraft-oa9o1.xlsx - 0.6828729281767956
11182-FinalDraft-5tv17.xlsx - 0.9237588652482271
11185-FinalDraft-16xd9.xlsx - 0.9022979270448337
11186-FinalDraft-w4mqb.xlsx - 0.5987828732884157
11187-FinalDraft-25gpc.xlsx - 0.6157049375371803
11190-FinalDraft-3cy44.xlsx - 0.45659259259259266
11191-FinalDraft-ckccd.xlsx - 0.6518634233430038
11192-FinalDraft-64wqv.xlsx - 0.4650751547303271
11193-FinalDraft-4lij5.xlsx - 0.6206594689232565
11196-FinalDraft-nx1ai.xlsx - 0.9139389481426996
11197-FinalDraft

In [12]:
for file in file_filepaths.keys():
    if len(file_filepaths[file]) < 2:
        print(file)

In [13]:
rater_filepaths['JM'][13]

'input_files_v3\\excel annotations\\samle set 05-01-2025 AS JM\\Rater JM\\11186-FinalDraft-w4mqb.xlsx'

In [14]:
temp = pd.read_excel(rater_filepaths['JM'][13])

In [15]:
temp['label'][temp['label'].isna()] = 'aaaaaa'

In [16]:
temp[temp['label'].isna()]

Unnamed: 0,text,label


In [17]:
# df_label

In [18]:
# label_consistency

In [19]:
# limit = min([len(l) for l in text_list])

# for i in range(63):
#     for l in text_list[1:3]:
#         print(l[i])
#         # print('--'*30)
#     print('+-'*30)

In [20]:
# pd.DataFrame(annotation_list).T[:][0].unique()

In [21]:
# cohen_kappa_score(annotation_list[2], annotation_list[3])

In [22]:
# for text, annotation in zip(final_text, final_annotation):
#     print(f'[{annotation}] | {text}')
#     print()

In [23]:
# def get_char_annotation(doc):
#     char_annotations = []
#     for paragraph in doc.paragraphs:
#         for run in paragraph.runs:
#             # if run.text.strip():
#             char_annotations += [int(run.font.highlight_color) if run.font.highlight_color else 99 for _ in range(len(run.text))]

#     return char_annotations

In [24]:
# def get_label_annotation(doc, label):
#     annotations = get_char_annotation(doc)
#     label_annotation = [annotation == label for annotation in annotations]
#     return label_annotation

In [25]:
# files_paths = {}

# for folder in annotation_folders:
#     folder_path = os.path.join(root_folder, folder)
#     files = os.listdir(folder_path)
#     for file in files:
#         file_path = os.path.join(folder_path, file).replace("~$", "09")

#         if file not in files_paths:
#             files_paths[file] = []

#         files_paths[file].append(file_path)

In [26]:
# files_annotations = {}

# for folder in annotation_folders:
#     folder_path = os.path.join(root_folder, folder)
#     files = os.listdir(folder_path)
#     for file in files:
#         file_path = os.path.join(folder_path, file).replace("~$", "09")
#         # print(file_path)
#         doc = Document(file_path)
#         label_annotation = get_label_annotation(doc, 7)

#         if file not in files_annotations:
#             files_annotations[file] = []

#         files_annotations[file].append(label_annotation)

In [27]:
# file_names = list(files_annotations.keys())
# for file_name in file_names:
#     if len(set([len(annotation) for annotation in files_annotations[file_name]])) != 1 or len(files_annotations[file_name]) == 1:
#         print(file_name)
#         files_annotations.pop(file_name, ' ')

In [28]:
# for file_name in files_annotations.keys():
#     print(file_name)

In [29]:
# annotation_groups = {
#     'No Statement': [],
#     'less than 0': [],
#     '0.01 – 0.20': [],
#     '0.21 – 0.40': [],
#     '0.41 – 0.60': [],
#     '0.61 – 0.80': [],
#     '0.81 – 1.00': []
# }

In [30]:
# for file_name in files_annotations.keys():
#     agg = irr.aggregate_raters(pd.DataFrame(files_annotations[file_name]).T)
#     fleiss_kappa_score = irr.fleiss_kappa(agg[0], method='fleiss')
#     if 'Norming' in file_name:
#         print(fleiss_kappa_score)
#     if fleiss_kappa_score <= 0:
#         annotation_groups['less than 0'].append(file_name)
#     elif fleiss_kappa_score <= 0.2:
#         annotation_groups['0.01 – 0.20'].append(file_name)
#     elif fleiss_kappa_score <= 0.4:
#         annotation_groups['0.21 – 0.40'].append(file_name)
#     elif fleiss_kappa_score <= 0.6:
#         annotation_groups['0.41 – 0.60'].append(file_name)
#     elif fleiss_kappa_score <= 0.8:
#         annotation_groups['0.61 – 0.80'].append(file_name)
#     elif fleiss_kappa_score <= 1.0:
#         annotation_groups['0.81 – 1.00'].append(file_name)
#     else:
#         annotation_groups['No Statement'].append(file_name)

In [31]:
# root_output_folder = 'Grouped Annotations'
# os.mkdir(root_output_folder)
# for group in annotation_groups.keys():
#     group_folder_path = os.path.join(root_output_folder, group)
#     os.mkdir(group_folder_path)
#     for file_name in annotation_groups[group]:
#         if file_name in files_paths:
#             file_folder_path = os.path.join(group_folder_path, file_name.split('.')[0])
#             os.mkdir(file_folder_path)
#             for i, file_path in enumerate(files_paths[file_name]):
#                 doc = Document(file_path)
#                 temp = file_name.split('.')
#                 temp.insert(1, f'__{str(i+1)}.')
#                 new_file_name = ''.join(temp)
#                 doc.save(os.path.join(file_folder_path, new_file_name))

In [32]:
# temp = [1,2,3,4,5]
# temp = [i==2 for i in temp]
# temp

In [33]:
# norm_file_paths = files_paths['Thesis Statement Norming Set 1.docx']
# para_annotations = [[] for _ in range(9)]

# for norm_file_path in norm_file_paths:
#     rater = norm_file_path.split('\\')[2].split()[2]
#     doc = Document(norm_file_path)
#     char_annotations = []
#     print(rater)
#     p_count = 0
#     for para in doc.paragraphs:
#         if para.text.strip():
#             char_annotations = []
#             for run in para.runs:
#                 char_annotations += [int(run.font.highlight_color) if run.font.highlight_color else 99 for _ in range(len(run.text))]
#             char_annotations = [i==7 for i in char_annotations]
#             para_annotations[p_count].append(char_annotations)
#             p_count += 1

In [34]:
# for para_annotation in para_annotations[1:]:
#     temp = pd.DataFrame(para_annotation).T
#     agg = irr.aggregate_raters(temp)
#     fleiss_kappa_score = irr.fleiss_kappa(agg[0], method='fleiss')
#     print(np.round(fleiss_kappa_score, 2))

In [35]:
# temp = pd.DataFrame([[0,0,0,1,0], [0,0,0,1,1]]).T

# print(temp)
# agg = irr.aggregate_raters(temp)
# fleiss_kappa_score = irr.fleiss_kappa(agg[0], method='fleiss')
# print(fleiss_kappa_score)