In [36]:
from code.coref_utils.conll import evaluate_conll
from code.coref_utils.utils import get_mention_to_cluster
from code.coref_utils.metrics import CorefEvaluator
from os import path
import os
import glob
import json

import logging

logging.basicConfig(format='%(asctime)s - %(message)s', level=logging.INFO)


In [37]:
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f.readlines():
            data.append(json.loads(line.strip()))
    
    return data

In [38]:
ontonotes_log_dir="/home/shtoshni/Research/litbank_coref/models/ontonotes_logs"
conll_dir = "/home/shtoshni/Research/litbank_coref/data/ontonotes/conll"
conll_scorer = "/home/shtoshni/Research/litbank_coref/resources/reference-coreference-scorers/scorer.pl"

split = 'test'
if split == 'test':
    ontonotes_files = glob.glob(path.join(ontonotes_log_dir, "*test.jsonl"))
else:
    ontonotes_files = glob.glob(path.join(ontonotes_log_dir, "*.jsonl"))
print(ontonotes_files)

['/home/shtoshni/Research/litbank_coref/models/ontonotes_logs/ontonotes_unbounded.test.jsonl', '/home/shtoshni/Research/litbank_coref/models/ontonotes_logs/ontonotes_learned_20.test.jsonl', '/home/shtoshni/Research/litbank_coref/models/ontonotes_logs/ontonotes_learned_10.test.jsonl', '/home/shtoshni/Research/litbank_coref/models/ontonotes_logs/ontonotes_lru_10.test.jsonl']


In [39]:
def log_file_to_model(log_file):
    model_name = path.basename(log_file).split(".")[0]
#     print(model_name)
    return model_name

In [46]:
for log_file in ontonotes_files:
    model_name = log_file_to_model(log_file)
    data = load_jsonl(log_file)
    coref_predictions, subtoken_maps = {}, {}
    for example in data:
        predicted_clusters, mention_to_predicted =\
            get_mention_to_cluster(example["predicted_clusters"], threshold=2)
        gold_clusters, mention_to_gold =\
            get_mention_to_cluster(example["clusters"], threshold=2)
        
        coref_predictions[example["doc_key"]] = predicted_clusters
        subtoken_maps[example["doc_key"]] = example["subtoken_map"]
        
    split='test'
    gold_path = path.join(conll_dir, f'{split}.conll')
    prediction_file = path.join(ontonotes_log_dir, f'{model_name}.conll')
    if split == 'test':
        prediction_file = path.join(ontonotes_log_dir, f'{model_name}.{split}.conll')
#     print(prediction_file)
#     print(coref_predictions.keys())
    conll_results = evaluate_conll(
        conll_scorer, gold_path, coref_predictions, subtoken_maps, prediction_file, all_metrics=True)
    average_f1 = sum(results["f"] for results in conll_results.values()) / len(conll_results)
    logging.info("%s (CoNLL) F-score : %.1f, MUC: %.1f %.1f %.1f, Bcub:  %.1f %.1f %.1f, CEAFE:  %.1f %.1f %.1f"
                 % (model_name, average_f1, 
                    conll_results["muc"]["p"], conll_results["muc"]["r"], conll_results["muc"]["f"], 
                    conll_results['bcub']["p"], conll_results['bcub']["r"], conll_results['bcub']["f"],
                    conll_results['ceafe']["p"], conll_results['ceafe']["r"], conll_results['ceafe']["f"]))
    

2020-08-13 00:41:36,972 - ontonotes_unbounded (CoNLL) F-score : 77.4, MUC: 82.1 85.4 83.7, Bcub:  73.8 78.3 76.0, CEAFE:  72.2 72.6 72.4
2020-08-13 00:41:40,905 - ontonotes_learned_20 (CoNLL) F-score : 77.3, MUC: 83.3 84.4 83.8, Bcub:  75.2 76.9 76.0, CEAFE:  73.1 71.1 72.1
2020-08-13 00:41:44,936 - ontonotes_learned_10 (CoNLL) F-score : 76.2, MUC: 83.9 82.2 83.0, Bcub:  76.2 73.8 75.0, CEAFE:  72.1 69.4 70.7
2020-08-13 00:41:48,936 - ontonotes_lru_10 (CoNLL) F-score : 75.0, MUC: 83.6 80.8 82.2, Bcub:  76.3 71.1 73.6, CEAFE:  69.3 68.8 69.0
