In [1]:
import json, random, os
import pandas as pd
import numpy as np
import math
from collections import Counter

In this notebook, we compare the annotated results of different annotators. We actually do not compute annotator agreement, such as Cohen's kappa. We decided that we'd rather have annotators re-visit the annotations that they disagree on, so they can decide together whether the result is actually relevant or not.

In [2]:
def read_csv_annotations(annotations_df):
    annoations_dict = {}
    for idx, row in enumerate(annotations_df.iterrows()):
        if math.isnan(row[1]['page_nr']):
            continue

        # keep track of the query
        if type(row[1]['query']) == str:
            passage_rank = 0
            try:
                query = row[1]['query']
                [q_key] = [q for q in queries_and_result_dict.keys() if query in q]
            except:
                q_key = row[1]['query'] + "\n"
        elif math.isnan(row[1]['query']):
            passage_rank += 1
        

        # assign relevance
        relevance = row[1]['relevance']
        
        # keep track
        if q_key not in annoations_dict.keys():
            annoations_dict[q_key] = [{"text": row[1]['text'],
                                      "relevance": relevance,
                                      "row_idx": idx}]
        else:
            annoations_dict[q_key].append({"text": row[1]['text'],
                                           "relevance": relevance,
                                           "row_idx": idx})
    return annoations_dict

In [3]:
def call_comparison(blue_csv, red_csv):

    if type(red_csv) == str:
        print(f"=== Results for {blue_csv} vs \n\t\t\t{red_csv} ")
        inconsistencies = compare_csv_annotations(blue_csv, red_csv)
    
    elif type(red_csv) == list:
        for single_red_csv in red_csv:
            print(f"=== Results for {blue_csv} vs \n\t\t\t{single_red_csv} ")
            inconsistencies = compare_csv_annotations(blue_csv, single_red_csv)
    return inconsistencies

In [4]:
def compare_interannotations(blue_corner, red_corner):
    
    inconsistencies = {"files": [blue_corner, red_corner],
                       "queries": [],
                       "texts": [],
                       "relevances": [],
                       "rows": []}
    
    # load csvs
    blue_df = pd.read_csv(blue_corner)
    blue_dict = read_csv_annotations(blue_df)
    red_df = pd.read_csv(red_corner)
    red_dict = read_csv_annotations(red_df)
    
    
    # Add the relevance to specific queries and first 3 retrieved documents
    for q_key in blue_dict.keys():
        blue_results = blue_dict[q_key]
        # this may run into INDEX errors, will have to sort out
        red_results = red_dict[q_key]
        
        for b_row_idx, blue_result in enumerate(blue_results):
            red_result = red_results[b_row_idx]
#                 print(blue_result["text"])
            if blue_result["text"] == red_result["text"]:
                # same retrieved text for a query
                
                if blue_result["relevance"] != red_result["relevance"]: #  and  blue_result["relevance"] != 'h':
                    # inconsistent annotation, grab the rows
                        print("Inconsistent")
                        inconsistencies["queries"].append(q_key)
                        inconsistencies["texts"].append([blue_result["text"], red_result["text"]])
                        inconsistencies["relevances"].append([blue_result["relevance"], red_result["relevance"]])
                        inconsistencies["rows"].append([int(blue_result["row_idx"])+2, 
                                                        int(red_result["row_idx"])+2])
    
    

    print("Number of query keys found: ", len(inconsistencies["rows"]))
    
    return inconsistencies

In [5]:
def compare_interannotations(blue_corner, red_corner):
    
    inconsistencies = {"files": [blue_corner, red_corner],
                       "queries": [],
                       "texts": [],
                       "relevances": [],
                       "rows": []}
    
    # load csvs
    blue_df = pd.read_csv(blue_corner)
    blue_dict = read_csv_annotations(blue_df)
    red_df = pd.read_csv(red_corner)
    red_dict = read_csv_annotations(red_df)
    
    
    # Add the relevance to specific queries and first 3 retrieved documents
    for q_key in blue_dict.keys():
        
        blue_results = blue_dict[q_key]
        red_results = red_dict[q_key]
        
        for b_row_idx, blue_result in enumerate(blue_results):
            for r_row_idx, red_result in enumerate(red_results):
    #             red_result = red_results[b_row_idx]
    #                 print(blue_result["text"])
                if blue_result["text"] == red_result["text"]:
                    # same retrieved text for a query
                    if blue_result["relevance"] != red_result["relevance"]: #  and  blue_result["relevance"] != 'h':
                        # inconsistent annotation, grab the rows
                            print("Inconsistent")
                            inconsistencies["queries"].append(q_key)
                            inconsistencies["texts"].append([blue_result["text"], red_result["text"]])
                            inconsistencies["relevances"].append([blue_result["relevance"], red_result["relevance"]])
                            inconsistencies["rows"].append([int(blue_result["row_idx"])+2, 
                                                            int(red_result["row_idx"])+2])
    
    

    print("Number of query keys found: ", len(inconsistencies["rows"]))
    
    return inconsistencies

In [6]:
def inter_annotator_comparison(blue_csv, red_csv):

    if type(red_csv) == str:
        print(f"=== Results for {blue_csv} vs \n\t\t\t{red_csv} ")
        inconsistencies = compare_interannotations(blue_csv, red_csv)
    
    elif type(red_csv) == list:
        for single_red_csv in red_csv:
            print(f"=== Results for {blue_csv} vs \n\t\t\t{single_red_csv} ")
            inconsistencies = compare_interannotations(blue_csv, single_red_csv)
    return inconsistencies

In [7]:
blue_corner = "data/annotated/annotated_rw.csv"
red_corner = ["data/annotated/annotated_f.csv", 
              "data/annotated/annotated_rk.csv"]

In [8]:
inconsistencies = inter_annotator_comparison(blue_corner, red_corner)


=== Results for data/annotated/annotated_rw.csv vs 
			data/annotated/annotated_f.csv 
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Number of query keys found:  50
=== Results for data/annotated/annotated_rw.csv vs 
			data/annotated/annotated_rk.csv 
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent
Inconsistent


In [9]:
inconsistencies['relevances']

[['y', 'n'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['y', 'n'],
 ['y', 'n'],
 ['n', 'y'],
 ['h', 'y'],
 ['h', 'n'],
 ['n', 'y'],
 ['y', 'n'],
 ['y', 'n'],
 ['y', 'n'],
 ['n', 'y'],
 ['n', 'y'],
 ['y', 'n'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['y', 'n'],
 ['y', 'n'],
 ['y', 'n'],
 ['y', 'n'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['y', 'n'],
 ['y', 'n'],
 ['n', 'y'],
 ['y', 'n'],
 ['y', 'n'],
 ['y', 'n'],
 ['h', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['y', 'n'],
 ['h', 'n'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['y', 'n'],
 ['h', 'n'],
 ['n', 'y'],
 ['n', 'y'],
 ['y', 'n'],
 ['n', 'y'],
 ['y', 'n'],
 ['n', 'y'],
 ['h', 'n'],
 ['h', 'n'],
 ['h', 'y'],
 ['h', 'n'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['n', 'y'],
 ['h', 'y'],
 ['y', 'n'],
 ['n', 'y'],
 ['y', 'n'],
 ['y', 'n'],
 ['n', 'y'],
 ['h', 'n'],
 ['y', 'n'],
 ['n', 'y'],

In [10]:
inconsistencies['rows']

[[2, 2],
 [4, 4],
 [7, 7],
 [8, 8],
 [9, 9],
 [11, 11],
 [23, 23],
 [24, 24],
 [26, 26],
 [27, 27],
 [33, 33],
 [34, 34],
 [35, 35],
 [36, 36],
 [37, 37],
 [42, 42],
 [43, 43],
 [44, 44],
 [46, 46],
 [47, 47],
 [50, 50],
 [52, 52],
 [53, 53],
 [54, 54],
 [55, 55],
 [57, 57],
 [58, 58],
 [60, 60],
 [61, 61],
 [62, 62],
 [65, 65],
 [68, 68],
 [71, 71],
 [72, 72],
 [74, 74],
 [77, 77],
 [79, 79],
 [85, 85],
 [88, 88],
 [89, 89],
 [91, 91],
 [92, 92],
 [93, 93],
 [94, 94],
 [95, 95],
 [96, 96],
 [99, 99],
 [100, 100],
 [102, 102],
 [103, 103],
 [105, 105],
 [109, 109],
 [111, 111],
 [113, 113],
 [120, 120],
 [121, 121],
 [122, 122],
 [127, 127],
 [128, 128],
 [130, 130],
 [131, 131],
 [132, 132],
 [133, 133],
 [134, 134],
 [136, 136],
 [137, 137],
 [139, 139],
 [141, 141],
 [143, 143],
 [144, 144],
 [145, 145],
 [146, 146],
 [147, 147],
 [148, 148],
 [149, 149],
 [151, 151],
 [152, 152],
 [153, 153],
 [157, 157],
 [161, 161],
 [167, 167],
 [169, 169],
 [170, 170],
 [175, 175],
 [177, 177],

In [11]:
same_list = ['agreement']+['same'] * int(inconsistencies['rows'][-1][0])
for annotation, idx in zip(inconsistencies["relevances"], inconsistencies["rows"]):
    same_list[idx[0]-1] = annotation

In [12]:
# Agreement column to add to the csv for annotation
[print(l) for l in same_list]
''

agreement
['y', 'n']
same
['n', 'y']
same
same
['n', 'y']
['n', 'y']
['y', 'n']
same
['y', 'n']
same
same
same
same
same
same
same
same
same
same
same
['n', 'y']
['h', 'y']
same
['h', 'n']
['n', 'y']
same
same
same
same
same
['y', 'n']
['y', 'n']
['y', 'n']
['n', 'y']
['n', 'y']
same
same
same
same
['y', 'n']
['n', 'y']
['n', 'y']
same
['n', 'y']
['y', 'n']
same
same
['y', 'n']
same
['y', 'n']
['y', 'n']
['n', 'y']
['n', 'y']
same
['n', 'y']
['y', 'n']
same
['y', 'n']
['n', 'y']
['y', 'n']
same
same
['y', 'n']
same
same
['y', 'n']
same
same
['h', 'y']
['n', 'y']
same
['n', 'y']
same
same
['n', 'y']
same
['y', 'n']
same
same
same
same
same
['h', 'n']
same
same
['n', 'y']
['n', 'y']
same
['n', 'y']
['n', 'y']
['n', 'y']
['n', 'y']
['n', 'y']
['n', 'y']
same
same
['y', 'n']
['h', 'n']
same
['n', 'y']
['n', 'y']
same
['y', 'n']
same
same
same
['n', 'y']
same
['y', 'n']
same
['n', 'y']
same
same
same
same
same
same
['h', 'n']
['h', 'n']
['h', 'y']
same
same
same
same
['h', 'n']
['n', 'y']
s

''