# Confusion Matrices

***
**Table of Contents**

[I.](#prep) Prepare the Data

[II.](#agg) Inter-Annotator Agreement (IAA) Matrices with Aggregated Data
  * [Functions](#functions)
  * [Data Creation](#data)

In [1]:
import pandas as pd
from intervaltree import Interval, IntervalTree
import json
import pprint

<a id="prep"></a>
## I. Prepare Data

In [6]:
dirpath = "../annot-post/data/"
# Annotation data - 1 CSV per annotator, includes annotator's notes for each label, where provided
ann0 = "ann0_labels_notes.csv"
ann1 = "ann1_labels_notes.csv"
ann2 = "ann2_labels_notes.csv"
ann3 = "ann3_labels_notes.csv"
ann4 = "ann4_labels_notes.csv"
ann_files = [ann0, ann1, ann2, ann3, ann4]
agg = "aggregated_with_eadid_descid_cols.csv"

In [8]:
df0 = pd.read_csv(dirpath+ann_files[0])
df1 = pd.read_csv(dirpath+ann_files[1])
df2 = pd.read_csv(dirpath+ann_files[2])
df3 = pd.read_csv(dirpath+ann_files[3])
df4 = pd.read_csv(dirpath+ann_files[4])
dfagg = pd.read_csv(dirpath+agg, index_col=0)
# dfagg.head()

In [36]:
df0.loc[df0.label == "Non-binary"]

Unnamed: 0,annotator,file,entity,label,start,end,text,category,note


In [9]:
def splitOffsets(df):
    offsets = list(df.offsets)
    start, end = [], []
    for o in offsets:
        pair = o[1:-1]
        pair_list = pair.split(",")
        start += [pair_list[0]]
        end += [pair_list[1]]
    df["start"] = start
    df["end"] = end
    df = df.astype({"start":int, "end":int})
    return df
dffagg = splitOffsets(dfagg)
# dfagg.head()

In [10]:
# Replace NaN with "None provided"
df0.note.fillna("None provided", inplace = True)
df1.note.fillna("None provided", inplace = True)
df2.note.fillna("None provided", inplace = True)
df3.note.fillna("None provided", inplace = True)
df4.note.fillna("None provided", inplace = True)
# df4.head()

In [11]:
# Split annotator 0's data into the two categories corresponding to the annotator pairs
# (ann1 and ann2 = Person-Name, Linguistic; ann3 and ann4 = Contextual)
df0PL = df0.loc[df0.category != "Contextual"]
df0C = df0.loc[df0.category == "Contextual"]
# df0C.head()

In [12]:
# Find the files both input annotators labeled
def findCommonFiles(df_a, df_b):
    common = []
    files_a = set(list(df_a.file))
    files_b = set(list(df_b.file))
    for f in files_a:
        if f in files_b:
            common += [f]
    return common

In [15]:
# AGGREGATED ONLY
commonagg = list(set(list(dfagg.file)))
# ALL LABELS
common0agg = findCommonFiles(df0, dfagg)
# PERSON NAME & LINGUISTIC
# Among annotators
common0PL1 = findCommonFiles(df0PL, df1)
common0PL2 = findCommonFiles(df0PL, df2)
common12 = findCommonFiles(df1, df2)
# With aggregated dataset
common0PLagg = findCommonFiles(df0, dfagg)
common1agg = findCommonFiles(df1, dfagg)
common2agg = findCommonFiles(df2, dfagg)
# CONTEXTUAL
# Among annotators
common0C3 = findCommonFiles(df0C, df3)
print("Total Files Annotators 0 & 3 labeled:",len(common0C3))
print("Sample:",common0C3[:5])
common0C4 = findCommonFiles(df0C, df4)
common34 = findCommonFiles(df3, df4)
# With aggregated dataset
common0Cagg = findCommonFiles(df0C, dfagg)
common3agg = findCommonFiles(df4, dfagg)
common4agg = findCommonFiles(df3, dfagg)

Total Files Annotators 0 & 3 labeled: 485
Sample: ['Coll-1291_00100.ann', 'Coll-1052_00100.ann', 'Coll-1061_00300.ann', 'Coll-1091_00100.ann', 'Coll-1434_05900.ann']


<a id="agg"></a>
## II. Inter-Annotator Agreement (IAA) Matrices with Aggregated Data

In [16]:
ann0withagg = df0.loc[df0.file.isin(common0agg)]
aggwithann0 = dfagg.loc[dfagg.file.isin(common0agg)]
# PERSON-NAME & LINGUISTIC
# ann0PLwithagg = df0PL.loc[df0PL.file.isin(common0PLagg)]
ann1withagg = df1.loc[df1.file.isin(common1agg)]
aggwithann1 = dfagg.loc[dfagg.file.isin(common1agg)]
ann2withagg = df2.loc[df2.file.isin(common2agg)]
aggwithann2 = dfagg.loc[dfagg.file.isin(common2agg)]
# CONTEXTUAL
# ann0Cwithagg = df0C.loc[df0C.file.isin(common0Cagg)]
ann3withagg = df3.loc[df3.file.isin(common3agg)]
aggwithann3 = dfagg.loc[dfagg.file.isin(common3agg)]
ann4withagg = df4.loc[df4.file.isin(common4agg)]
aggwithann4 = dfagg.loc[dfagg.file.isin(common4agg)]

<a id="functions"></a>
### Functions

In [17]:
# Create an interval tree for one annotator for a specified file and specified label
def createIntervalTree(df, filename, labelname):
    subdf = df[df.file == filename]                         # Get only rows for the input file
    subdf = subdf[subdf.label == labelname]                 # Get only rows for that file with the input label
    subdf = subdf.astype({"start":int, "end":int})          # Make sure the offsets are integers
    offsets = list(zip(list(subdf.start), list(subdf.end)))
    return IntervalTree.from_tuples(offsets)

In [18]:
# Create an interval tree for one annotator for a specified file and any label
def createIntervalTreeAllLabels(df, filename):
    subdf = df[df.file == filename]            # Get only rows for the input file
    subdf = subdf.astype({"start":int, "end":int})  # Make sure the offsets are integers
    offsets = list(zip(list(subdf.start), list(subdf.end)))
    return IntervalTree.from_tuples(offsets)

In [19]:
# Count how many text spans for a particular label in the expected dataset have no matching or
#  overlapping text spans in an predicted dataset, no matter what label the annotator used
def falsePerFile(tree_exp, tree_pred):
    return len(tree_exp.difference(tree_pred))        

In [20]:
# Count the false negatives (misses) if labelname is expected label
#   OR 
# Count false positives (shouldn't have been labeled) if labelname is predicted label
def falseAcrossFiles(df_exp, df_pred, commonfiles, labelname):
    false = 0
    for f in commonfiles:
        t_exp = createIntervalTree(df_exp, f, labelname)
        t_pred = createIntervalTreeAllLabels(df_pred, f)
        f = falsePerFile(t_exp, t_pred)
        false += f
    return false

In [21]:
# Count all matches including exactly matching, overlapping, and enveloping annotations
def iaaPerFile(tree_exp, tree_pred):
    tp = 0                              # count of true positives (overlaps & matches)
    for annotation in tree_exp: 
        tp += len(tree_pred.overlap(annotation))
    return tp #, fn, fp

In [22]:
# Create lists of all true positive, false negative, and false_positive counts for all common files between 
# expected and predicted annotators for the input label
def iaaAcrossFiles(df_exp, df_pred, commonfiles, exp_labelname, pred_labelname):
    true_positives = [] #, false_negatives, false_positives = [], [], []
    for f in commonfiles:
        t_exp = createIntervalTree(df_exp, f, exp_labelname)
        t_pred = createIntervalTree(df_pred, f, pred_labelname)
        tp = iaaPerFile(t_exp, t_pred)   #, fn, fp
        true_positives += [tp]
         #false_negatives += [fn]
         #false_positives += [fp]
    return true_positives  #, false_negatives, false_positives

In [23]:
def initMatrix(labels):
    expected = labels
    predicted = labels
    matrix = []
    for e_label in expected:
        for p_label in predicted:
            matrix += [{"expected": e_label, "predicted": p_label, "count": 0}]
    return matrix    #[:-1]  # Exclude dictionary with None as both expected and predicted  

In [24]:
# INPUT: list of dictionaries of expected and predicted label pairs with counts of zero,
#        DataFrame of expected labels, DataFrame of predicted labels, list of files common
#        to both DataFrames
# OUTPUT: list of dictionaries with the counts for each label pair filled in
def fillMatrix(matrix_dict, exp, pred, common):
    i = 0
    maxI = len(matrix_dict)
    while i < maxI:
        pair = matrix_dict[i]
        exp_labelname = pair["expected"]
        pred_labelname = pair["predicted"]
        if pred_labelname == "None":
            fn = falseAcrossFiles(exp, pred, common, exp_labelname)  # False Negatives
            matrix_dict[i]["count"] += fn
        elif exp_labelname == "None":
            fp = falseAcrossFiles(pred, exp, common, pred_labelname) # False Positives
            matrix_dict[i]["count"] += fn
        else:
            tp_list = iaaAcrossFiles(exp, pred, common, exp_labelname, pred_labelname)
            tp_sum = sum(tp_list)
            matrix_dict[i]["count"] += tp_sum
        i += 1
    return matrix_dict

In [25]:
labels = ["Occupation", "Omission", "Stereotype", "Empowering", 
          "Unknown", "Masculine", "Feminine", "Nonbinary", 
          "Gendered-Role", "Gendered-Pronoun", "Generalization",
          "None"]
matrix_dict = initMatrix(labels)
label_no = [n for n in range(len(labels))]
label_no_dict = dict(zip(labels,label_no))  # label_no_dict = dict(zip(label_no,labels))

In [26]:
def getMatrixOfCounts(labels, matrix_dict):
    # Create matrix of counts
    matrix = []
    # exp = matrix_dict[0]["expected"]
    maxI = len(labels)  # 11
    matrix_row = []
    for pair in matrix_dict:
        matrix_row += [pair["count"]]
        if len(matrix_row) == maxI:
            matrix += [matrix_row]
            matrix_row = []
    return matrix

In [27]:
def writeJSON(matrix_data, filepath):
    json_data = json.dumps(matrix_data)
    json_file = open(filepath, "w")
    json_file.write(json_data)
    json_file.close()
    print("Finished writing "+filepath+"!")

In [28]:
# Reformat the JSON data for visualiation (https://observablehq.com/d/3eab142ea747ae66)
def reformatForViz(matrix_dict):
    new_data = []
    for pair in matrix_dict:
        gt = label_no_dict[pair["expected"]]
        pt = label_no_dict[pair["predicted"]]
        occurrence = pair["count"]
        if occurrence > 0:
            new_data += [{"gt": gt, "pt": pt}]*occurrence
    return new_data

<a id="data"></a>
### Data Creation

* **Expected:** Aggregated

* **Predicted:** Annotators 0, 1, 2, 3, & 4

In [31]:
# PRED: ANNOTATOR 0
exp = aggwithann0    # subset DataFrame with labels only for files in common with predicted DataFrame
pred = ann0withagg   # subset DataFrame with labels only for files in common with expected DataFrame
common = common0agg  # list of files both expected and predicted DataFrames have labels (rows) for
matrix = initMatrix(labels)
matrix_dict = fillMatrix(matrix, exp, pred, common)
writeJSON(matrix_dict, "../annot/data/data_iaa/0_with_agg.json")
matrix_counts = getMatrixOfCounts(labels, matrix_dict)
matrix_numeric = reformatForViz(matrix_dict)
writeJSON(matrix_numeric, "../annot/data/data_iaa/0_with_agg_numeric.json")
# print(matrix_dict)

Finished writing ../annot/data/data_iaa/0_with_agg.json!
Finished writing ../annot/data/data_iaa/0_with_agg_numeric.json!


In [None]:
# PRED: ANNOTATOR 1
exp = aggwithann1    # subset DataFrame with labels only for files in common with predicted DataFrame
pred = ann1withagg   # subset DataFrame with labels only for files in common with expected DataFrame
common = common1agg  # list of files both expected and predicted DataFrames have labels (rows) for
matrix = initMatrix(labels)
matrix_dict = fillMatrix(matrix, exp, pred, common)
writeJSON(matrix_dict, "data/data_iaa/1_with_agg.json")
# matrix_counts = getMatrixOfCounts(labels, matrix_dict)
matrix_numeric = reformatForViz(matrix_dict)
writeJSON(matrix_numeric, "data/data_iaa/1_with_agg_numeric.json")

In [None]:
# PRED: ANNOTATOR 2
exp = aggwithann2    # subset DataFrame with labels only for files in common with predicted DataFrame
pred = ann2withagg   # subset DataFrame with labels only for files in common with expected DataFrame
common = common2agg  # list of files both expected and predicted DataFrames have labels (rows) for
matrix = initMatrix(labels)
matrix_dict = fillMatrix(matrix, exp, pred, common)
writeJSON(matrix_dict, "data/data_iaa/2_with_agg.json")
# matrix_counts = getMatrixOfCounts(labels, matrix_dict)
matrix_numeric = reformatForViz(matrix_dict)
writeJSON(matrix_numeric, "data/data_iaa/2_with_agg_numeric.json")

In [None]:
# PRED: ANNOTATOR 3
exp = aggwithann3    # subset DataFrame with labels only for files in common with predicted DataFrame
pred = ann3withagg   # subset DataFrame with labels only for files in common with expected DataFrame
common = common3agg  # list of files both expected and predicted DataFrames have labels (rows) for
matrix = initMatrix(labels)
matrix_dict = fillMatrix(matrix, exp, pred, common)
writeJSON(matrix_dict, "data/data_iaa/3_with_agg.json")
# matrix_counts = getMatrixOfCounts(labels, matrix_dict)
matrix_numeric = reformatForViz(matrix_dict)
writeJSON(matrix_numeric, "data/data_iaa/3_with_agg_numeric.json")

In [None]:
# PRED: ANNOTATOR 4
exp = aggwithann4    # subset DataFrame with labels only for files in common with predicted DataFrame
pred = ann4withagg   # subset DataFrame with labels only for files in common with expected DataFrame
common = common4agg  # list of files both expected and predicted DataFrames have labels (rows) for
matrix = initMatrix(labels)
matrix_dict = fillMatrix(matrix, exp, pred, common)
writeJSON(matrix_dict, "data/data_iaa/4_with_agg.json")
# matrix_counts = getMatrixOfCounts(labels, matrix_dict)
matrix_numeric = reformatForViz(matrix_dict)
writeJSON(matrix_numeric, "data/data_iaa/4_with_agg_numeric.json")

* **Expected:** Aggregated

* **Predicted:** Aggregated

(To study how often text spans (matching or overlapping ) have multiple labels)

In [None]:
# PRED: AGGREGATED
exp = dfagg
pred = dfagg 
common = commonagg  # list of unique files
matrix = initMatrix(labels)
matrix_dict = fillMatrix(matrix, exp, pred, common)
writeJSON(matrix_dict, "data/data_iaa/agg_with_agg.json")
# matrix_counts = getMatrixOfCounts(labels, matrix_dict)
matrix_numeric = reformatForViz(matrix_dict)
writeJSON(matrix_numeric, "data/data_iaa/agg_with_agg_numeric.json")

*Visit [Observable](https://observablehq.com/d/3eab142ea747ae66) for confusion matrices visualizing the data files created in this Notebook!*