In [1]:
import json
import os
import numpy as np
import sys
from sklearn.metrics import f1_score

In [2]:
def is_aligned(actual, pred):
    a_start, a_end = actual[0][0], actual[0][1]
    p_start, p_end = pred[0][0], pred[0][1]
    return a_start == p_start

In [3]:
def process_f1(path):
    with open(path, "r") as f:
        examples = json.load(f)
    pred_arr = []
    gold_arr = np.full((len(examples), 1), True)
    for example in examples:
        gold = example["clusters"][0]
        if "predicted_clusters" in example and example["predicted_clusters"] != []:
            predicted = example["predicted_clusters"][0]
            gold = sorted(gold, key=lambda x: x[0])
            predicted = sorted(predicted, key=lambda x: x[0])
            if is_aligned(gold, predicted):
                pred_arr.append(True)
            else:
                pred_arr.append(False)
        else:
            pred_arr.append(False)
    f1 = f1_score(gold_arr, pred_arr)
    return f1

In [5]:
# Results for each WB type is stored in a .json file in scores/
def process_f1_trial(ckpt):
    t1a_path = f"scores/{ckpt}-type1_anti.json"
    t1p_path = f"scores/{ckpt}-type1_pro.json"
    t2a_path = f"scores/{ckpt}-type2_anti.json"
    t2p_path = f"scores/{ckpt}-type2_pro.json"
    res = []
    res.append(process_f1(t1a_path))
    res.append(process_f1(t1p_path))
    res.append(process_f1(t2a_path))
    res.append(process_f1(t2p_path))
    return res

In [10]:
scores = process_f1_trial("bert")
print(f"T1A: {round(scores[0]*100, 2)}")
print(f"T1P: {round(scores[1]*100, 2)}")
print(f"T2A: {round(scores[2]*100, 2)}")
print(f"T2P: {round(scores[3]*100, 2)}")
print(f"TPR1: {round((scores[1] - scores[0])*100, 2)}")
print(f"TPR2: {round((scores[3] - scores[2])*100, 2)}")

T1A: 53.06
T1P: 86.21
T2A: 81.44
T2P: 93.41
TPR1: 33.15
TPR2: 11.97


In [11]:
scores = process_f1_trial("mabel")
print(f"T1A: {round(scores[0]*100, 2)}")
print(f"T1P: {round(scores[1]*100, 2)}")
print(f"T2A: {round(scores[2]*100, 2)}")
print(f"T2P: {round(scores[3]*100, 2)}")
print(f"TPR1: {round((scores[1] - scores[0])*100, 2)}")
print(f"TPR2: {round((scores[3] - scores[2])*100, 2)}")

T1A: 62.5
T1P: 83.19
T2A: 93.12
T2P: 95.38
TPR1: 20.69
TPR2: 2.26
