In [1]:
import os
from os import path
import numpy as np
import json

from metrics import CorefEvaluator
from scipy.stats import spearmanr

In [2]:
log_dir = "/home/shtoshni/Research/litbank_coref/models/ontonotes_logs"

models = ["unbounded", "learned", "lru"]
num_cells = ["5", "10", "20"]


In [3]:
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f.readlines():
            data.append(json.loads(line.strip()))
    
    return data


def mention_to_cluster(clusters, threshold=2):
    clusters = [tuple(tuple(mention) for mention in cluster)
                for cluster in clusters if len(cluster) >= threshold]
    mention_to_cluster = {}
    for cluster in clusters:
        for mention in cluster:
            mention_to_cluster[mention] = cluster
    return clusters, mention_to_cluster

In [4]:
# Load logs here

model_to_logs = {}
for model in models:
    if model == 'unbounded':
        model_file = path.join(log_dir, "ontonotes_unbounded.jsonl")
        model_to_logs['unbounded'] = load_jsonl(model_file)
    if model != 'unbounded':
        for num_cell in num_cells:
            model_file = path.join(log_dir, "ontonotes_{}_{}.jsonl".format(model, num_cell))
            model_to_logs['{}_{}'.format(model, num_cell)] = load_jsonl(model_file) 

In [5]:
print(model_to_logs.keys())

dict_keys(['unbounded', 'learned_5', 'learned_10', 'learned_20', 'lru_5', 'lru_10', 'lru_20'])


## Overwrites

In [6]:
for model, logs in model_to_logs.items():
    mem_usage = []
    for log in logs:
        over_action = sum([1  for action in log["pred_actions"] if action[1]=='o' ])
    
        if 'unbounded' not in model:
            num_cells = int(model.split('_')[-1])
            over_action = min(over_action, num_cells)

        mem_usage.append(over_action) 
    
    
    print ('{}, max: {}, avg: {:.1f}'.format(model, max(mem_usage), np.mean(mem_usage)))
    
#     print(mem_usage)

unbounded, max: 87, avg: 16.3
learned_5, max: 5, avg: 4.6
learned_10, max: 10, avg: 8.1
learned_20, max: 20, avg: 12.4
lru_5, max: 5, avg: 4.6
lru_10, max: 10, avg: 8.1
lru_20, max: 20, avg: 12.4


## Ignored Mentions

In [7]:
for model, logs in model_to_logs.items():
    mem_usage = []
    for log in logs:
        over_action = sum([1  for action in log["pred_actions"] if action[1]=='n' ])
    
        if 'unbounded' not in model:
            num_cells = int(model.split('_')[-1])
#             over_action = min(over_action, num_cells)

        mem_usage.append(over_action) 
    
    
    print ('{}, max: {}, avg: {:.1f}'.format(model, max(mem_usage), np.mean(mem_usage)))

unbounded, max: 0, avg: 0.0
learned_5, max: 21, avg: 0.5
learned_10, max: 1, avg: 0.0
learned_20, max: 1, avg: 0.0
lru_5, max: 79, avg: 5.4
lru_10, max: 24, avg: 0.7
lru_20, max: 3, avg: 0.0


## Spearman Correlation

In [8]:
model_perf_per_example = {}
for model in model_to_logs:
    log_data = model_to_logs[model]
    perf_list = []
    for example in log_data:
        evaluator = CorefEvaluator()
                
        predicted_clusters, mention_to_predicted =\
            mention_to_cluster(example["predicted_clusters"], threshold=2)
        gold_clusters, mention_to_gold =\
            mention_to_cluster(example["clusters"], threshold=2)


        evaluator.update(predicted_clusters, gold_clusters,
                         mention_to_predicted, mention_to_gold)
        
        doc_len = example["subtoken_map"][-1] + 1
        num_ents = len(gold_clusters)
        
        example_fscore = evaluator.get_prf()[2] * 100.0
        
        perf_list.append((doc_len, num_ents, example_fscore))
        
        
    model_perf_per_example[model] = perf_list

for model in model_perf_per_example:
    perf_list = model_perf_per_example[model]
    doc_len_list, num_ent_list, fscore = zip(*perf_list)
    
    print('{} doc len {:.2f}'.format(model, spearmanr(doc_len_list, fscore)[0]))
    print('{} num ent {:.2f}'.format(model, spearmanr(num_ent_list, fscore)[0]))

unbounded doc len -0.31
unbounded num ent -0.27
learned_5 doc len -0.38
learned_5 num ent -0.39
learned_10 doc len -0.37
learned_10 num ent -0.35
learned_20 doc len -0.30
learned_20 num ent -0.27
lru_5 doc len -0.42
lru_5 num ent -0.47
lru_10 doc len -0.36
lru_10 num ent -0.37
lru_20 doc len -0.33
lru_20 num ent -0.30


## Bucketing examples

In [9]:
# Use the unbounded mem examples

data = model_to_logs['unbounded']
doc_key_len_list = [(example["doc_key"], example["subtoken_map"][-1] + 1) 
                    for example in data]

doc_key_len_list = sorted(doc_key_len_list, key=lambda x: x[1])

In [10]:
partitions = []
len_constrainst = [(0, 128), (129, 256), (257, 512), (513, 768), (769, 1152), (1153, np.inf)]



In [11]:



model_perf_per_example = {}
for model in model_to_logs:
    log_data = model_to_logs[model]
    perf_list = []
    for example in log_data:
        evaluator = CorefEvaluator()
                
        predicted_clusters, mention_to_predicted =\
            mention_to_cluster(example["predicted_clusters"], threshold=2)
        gold_clusters, mention_to_gold =\
            mention_to_cluster(example["clusters"], threshold=2)


        evaluator.update(predicted_clusters, gold_clusters,
                         mention_to_predicted, mention_to_gold)
        
        doc_len = example["subtoken_map"][-1] + 1
        num_ents = len(gold_clusters)
        
        example_fscore = evaluator.get_prf()[2] * 100.0
        
        perf_list.append((doc_len, num_ents, example_fscore))
        
        
    model_perf_per_example[model] = perf_list

for model in model_perf_per_example:
    perf_list = model_perf_per_example[model]
    doc_len_list, num_ent_list, fscore = zip(*perf_list)
    
    print('{} doc len {:.2f}'.format(model, spearmanr(doc_len_list, fscore)[0]))
    print('{} num ent {:.2f}'.format(model, spearmanr(num_ent_list, fscore)[0]))

unbounded doc len -0.31
unbounded num ent -0.27
learned_5 doc len -0.38
learned_5 num ent -0.39
learned_10 doc len -0.37
learned_10 num ent -0.35
learned_20 doc len -0.30
learned_20 num ent -0.27
lru_5 doc len -0.42
lru_5 num ent -0.47
lru_10 doc len -0.36
lru_10 num ent -0.37
lru_20 doc len -0.33
lru_20 num ent -0.30
