In [1]:
import math 
import string 
import re

import json 

import pandas as pd 
import sqlite3
import spacy
import numpy as np
from rouge import Rouge 
nlp = spacy.load('en_core_web_sm')

In [2]:
annotations_path = '/home/ramprasad.sa/factual_annotation_llm_summaries/annotations'
def read_files(task):
    df_errors = pd.read_csv(f'{annotations_path}/{task}_annotations_scores.csv')
    df_aggreg = pd.read_csv(f'{annotations_path}/{task}_annotations.csv')
    return df_aggreg, df_errors



In [72]:
from sklearn.metrics import cohen_kappa_score
nlp = spacy.load('en_core_web_sm')

def agreement_label(df):
    ann1_summ_labels = []
    ann2_summ_labels = []
    for summid in list(set(df['summary_uuid'])):
        df_summ = df[df['summary_uuid'] == summid]
        ann1_label = list(set(df_summ['label_type_ann1']))
        ann2_label = list(set(df_summ['label_type_ann2']))
        assert (len(ann1_label) == 1 & len(ann2_label) == 1)
        ann1_label = 1 if ann1_label[0] == 'factual' else 0
        ann2_label = 1 if ann2_label[0] == 'factual' else 0
        ann1_summ_labels += [ann1_label]
        ann2_summ_labels += [ann2_label]
        
    assert len(ann1_summ_labels) == len(ann2_summ_labels)
    agreed_ann = [each for idx, each in enumerate(ann1_summ_labels) if each == ann2_summ_labels[idx]]
    agreement_score = len(agreed_ann)/len(ann1_summ_labels)
    cohenk_score = cohen_kappa_score(ann1_summ_labels, ann2_summ_labels)
    return agreement_score, cohenk_score
    
def agreement_sent(df):
    ann1_summ_sent_labels = []
    ann2_summ_sent_labels = []
    
    for summ in list(set(df['summary'].values)):
        summ_sents = list(nlp(summ).sents)
        for sent in summ_sents:
            sent = sent.text
            
            if sent not in list(df['nonfactual_sentence'].values):
                ann1_summ_sent_labels += [0]
                ann2_summ_sent_labels += [0]
            else:
                
                df_summ_sent = df[df['nonfactual_sentence'] == sent]
                
                ann1_label = list(df_summ_sent['inaccuracy_severity_ann1'].values)
                # print(list(set(ann1_label)))
                ann1_summ_sent_labels += [1 if type(each) is str else 0 for each in ann1_label]
                
                ann2_label = list(df_summ_sent['inaccuracy_severity_ann2'].values)
                ann2_summ_sent_labels += [1 if type(each) is str else 0 for each in ann2_label]
    
    assert (len(ann1_summ_sent_labels) == len(ann2_summ_sent_labels))
    agreed_ann = [each 
                  for idx, each in enumerate(ann1_summ_sent_labels)
                      if each == ann2_summ_sent_labels[idx]]
    agreement_score = len(agreed_ann)/len(ann1_summ_sent_labels)
    cohenk_score = cohen_kappa_score(ann1_summ_sent_labels, ann2_summ_sent_labels)
    # print(
    return agreement_score, cohenk_score
                
                
                
                

def agreement_categ(df):
    categ_map = {None: 0,
                 'intrinsic': 1,
                 'extrinsic': 2,
                 'other': 3}
    ann1_summ_sent_categ = []
    ann2_summ_sent_categ = []

    for summ in list(set(df['summary'].values)):
        summ_sents = list(nlp(summ).sents)
        for sent in summ_sents:
            sent = sent.text
            if sent not in list(df['nonfactual_sentence'].values):
                ann1_summ_sent_categ += [0]
                ann2_summ_sent_categ += [0]
            else:
                df_summ_sent = df[df['nonfactual_sentence'] == sent]

                ann1_categs = list(df_summ_sent['error_type_ann1'].values)
                ann1_categs = [each if type(each) is str else None for each in ann1_categs]

                ann2_categs = list(df_summ_sent['error_type_ann2'].values)
                ann2_categs = [each if type(each) is str else None for each in ann2_categs]

                ann1_summ_sent_categ += [categ_map[each] for each in ann1_categs]
                ann2_summ_sent_categ += [categ_map[each] for each in ann2_categs]

    assert(len(ann1_summ_sent_categ) == len(ann2_summ_sent_categ))
    agreed_ann = [idx 
                      for idx, each in enumerate(ann1_summ_sent_categ) 
                          if each == ann2_summ_sent_categ[idx]]
    agreement_score = len(agreed_ann)/len(ann2_summ_sent_categ)
    cohenk_score = cohen_kappa_score(ann1_summ_sent_categ, ann2_summ_sent_categ)   
    return agreement_score, cohenk_score

In [73]:
df_errors_news, df_aggr_news = read_files('news')
df_errors_billsum, df_aggr_billsum= read_files('billsum')
df_errors_pubmed, df_aggr_pubmed= read_files('pubmed')


In [79]:
print('***** NEWS*****')

print('Sent Agreement (Agg/Cohen)', agreement_sent(df_errors_news))
print('Categ Agreement (Agg/Cohen)', agreement_categ(df_errors_news))
print('Summary Agreement (Agg/Cohen)', agreement_label(df_errors_news))

***** NEWS*****
Sent Agreement (Agg/Cohen) (0.9169139465875371, 0.6547636470071712)
Categ Agreement (Agg/Cohen) (0.8635014836795252, 0.45576463979778115)
Summary Agreement (Agg/Cohen) (0.8, 0.5612110574813515)


In [80]:
print('***** Billsum*****')

print('Sent Agreement (Agg/Cohen)', agreement_sent(df_errors_billsum))
print('Categ Agreement (Agg/Cohen)', agreement_categ(df_errors_billsum))
print('Summary Agreement (Agg/Cohen)', agreement_label(df_errors_billsum))

***** Billsum*****
Sent Agreement (Agg/Cohen) (0.7857142857142857, 0.17260787992495308)
Categ Agreement (Agg/Cohen) (0.7755102040816326, 0.17037923810338196)
Summary Agreement (Agg/Cohen) (0.72, 0.3738819320214669)


In [81]:
print('***** Pubmed *****')

print('Sent Agreement (Agg/Cohen)', agreement_sent(df_errors_pubmed))
print('Categ Agreement (Agg/Cohen)', agreement_categ(df_errors_pubmed))
print('Summary Agreement (Agg/Cohen)', agreement_label(df_errors_pubmed))

***** Pubmed *****
Sent Agreement (Agg/Cohen) (0.9273182957393483, 0.10545032856590641)
Categ Agreement (Agg/Cohen) (0.9223057644110275, 0.052256532066508155)
Summary Agreement (Agg/Cohen) (0.85, 0.1573033707865169)


In [71]:
agreement_categ(df_errors_billsum)

(0.7755102040816326, 0.17037923810338196)