In [1]:
import os
from os import path
import numpy as np
import json

from metrics import CorefEvaluator
from scipy.stats import spearmanr

In [2]:
log_dir = "/home/shtoshni/Research/litbank_coref/models/litbank_preds"

models = ["unbounded", "learned", "lru"]
num_cells = ["5", "10", "20"]


In [3]:
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f.readlines():
            data.append(json.loads(line.strip()))
    
    return data

In [4]:
# Load logs here
model_to_logs = {}
for model in models:
    if model == 'unbounded':
        model_file = path.join(log_dir, f"{model}_20_dev.jsonl")  # 20 is just the default num_cells
        model_to_logs['unbounded'] = load_jsonl(model_file)
                
    else:
        for num_cell in num_cells:
            model_file = path.join(log_dir, f"{model}_{num_cell}_dev.jsonl")
            model_to_logs[f"{model}_{num_cell}"] = load_jsonl(model_file)
        

In [5]:
from collections import Counter

for model, log_data in model_to_logs.items():
    action_counter = Counter()
    for example in log_data:
        _, pred_actions = zip(*example["pred_actions"])
        for action in pred_actions:
            action_counter[action] += 1
            
    
    print(model, action_counter)
        

unbounded Counter({'i': 43444, 'c': 19947, 'o': 8079})
learned_5 Counter({'i': 44326, 'c': 18844, 'o': 7853, 'n': 447})
learned_10 Counter({'i': 44189, 'c': 19919, 'o': 7362})
learned_20 Counter({'i': 43868, 'c': 19985, 'o': 7617})
lru_5 Counter({'i': 43852, 'c': 17382, 'n': 7000, 'o': 3236})
lru_10 Counter({'i': 44019, 'c': 19065, 'o': 6964, 'n': 1422})
lru_20 Counter({'i': 43425, 'c': 19899, 'o': 8105, 'n': 41})


### Mention Ignored - Learned vs LRU

In [6]:
for model, logs in model_to_logs.items():
    mem_usage = []
    for log in logs:
        over_action = sum([1  for action in log["pred_actions"] if action[1]=='n' ])
    
        if 'unbounded' not in model:
            num_cells = int(model.split('_')[-1])
#             over_action = min(over_action, num_cells)

        mem_usage.append(over_action) 
    
    
    print ('{}, avg: {:.1f}'.format(model, np.mean(mem_usage)))

unbounded, avg: 0.0
learned_5, avg: 4.5
learned_10, avg: 0.0
learned_20, avg: 0.0
lru_5, avg: 70.0
lru_10, avg: 14.2
lru_20, avg: 0.4


### Number of Entities in Memory 

In [7]:
for model, logs in model_to_logs.items():
    mem_usage = []
    for log in logs:
        over_action = sum([1  for action in log["pred_actions"] if action[1]=='o' ])
    
        if 'unbounded' not in model:
            num_cells = int(model.split('_')[-1])
            over_action = min(over_action, num_cells)

        mem_usage.append(over_action) 
        
    print ('{}, max: {}, avg: {:.1f}'.format(model, max(mem_usage), np.mean(mem_usage)))
    


unbounded, max: 160, avg: 80.8
learned_5, max: 5, avg: 5.0
learned_10, max: 10, avg: 10.0
learned_20, max: 20, avg: 20.0
lru_5, max: 5, avg: 5.0
lru_10, max: 10, avg: 10.0
lru_20, max: 20, avg: 20.0


## Spearman Correlations

In [8]:
def mention_to_cluster(clusters, threshold=1):
    clusters = [tuple(tuple(mention) for mention in cluster)
                for cluster in clusters if len(cluster) >= threshold]
    mention_to_cluster = {}
    for cluster in clusters:
        for mention in cluster:
            mention_to_cluster[mention] = cluster
    return clusters, mention_to_cluster

model_perf_per_example = {}
for model in model_to_logs:
    log_data = model_to_logs[model]
    perf_list = []
    for example in log_data:
        evaluator = CorefEvaluator()
                
        predicted_clusters, mention_to_predicted =\
            mention_to_cluster(example["predicted_clusters"], threshold=1)
        gold_clusters, mention_to_gold =\
            mention_to_cluster(example["clusters"], threshold=1)


        evaluator.update(predicted_clusters, gold_clusters,
                         mention_to_predicted, mention_to_gold)
        
        doc_len = example["subtoken_map"][-1] + 1
        num_ents = len(gold_clusters)
        
        example_fscore = evaluator.get_prf()[2] * 100.0
        
        perf_list.append((doc_len, num_ents, example_fscore, example))
        
        
    model_perf_per_example[model] = perf_list
        

### Correlations with doc len and Num Ent

In [9]:
for model in model_perf_per_example:
    perf_list = model_perf_per_example[model]
    doc_len_list, num_ent_list, fscore, _ = zip(*perf_list)

    print('{} doc len {:.2f} num ent {:.2f}'.format(model, spearmanr(doc_len_list, fscore)[0], 
                                                    spearmanr(num_ent_list, fscore)[0]))

unbounded doc len -0.07 num ent -0.36
learned_5 doc len 0.01 num ent -0.23
learned_10 doc len -0.06 num ent -0.35
learned_20 doc len -0.03 num ent -0.32
lru_5 doc len -0.00 num ent -0.41
lru_10 doc len 0.01 num ent -0.36
lru_20 doc len -0.02 num ent -0.33


In [10]:
for num_cells in [5, 10, 20]:
    perf_learned_list = model_perf_per_example[f'learned_{num_cells}']
    perf_lru_list = model_perf_per_example[f'lru_{num_cells}']

    learned_better = []
    lru_better = []
    for i in range(len(perf_learned_list)):
        perf_learned = perf_learned_list[i]
        perf_lru = perf_lru_list[i]
        
        assert (perf_learned[3]['doc_key'] == perf_lru[3]['doc_key'] )
        perf_learned = perf_learned_list[i]
        perf_lru = perf_lru_list[i]

        diff_learned = perf_learned[2] - perf_lru[2] 
        if diff_learned > 0:
            learned_better.append((diff_learned, perf_learned[3]))

        diff_lru = -diff_learned
        if diff_lru > 0:
            lru_better.append((diff_lru, perf_lru[3]))

    
    print(f'Num cells {num_cells}')
    print('Learned better docs')
    learned_better = sorted(learned_better, key=lambda x: x[0], reverse=True)
    for (diff_score, example) in learned_better[:3]:
        print(f'{example["doc_key"]}, {diff_score:.1f}')
    print('LRU better docs')
    lru_better = sorted(lru_better, key=lambda x: x[0], reverse=True)
    for (diff_score, example) in lru_better[:3]:
        print(f'{example["doc_key"]}, {diff_score:.1f}')
    print()

Num cells 5
Learned better docs
8867_the_magnificent_ambersons_brat_0, 56.6
6593_history_of_tom_jones_a_foundling_brat_0, 37.7
24_o_pioneers_brat_0, 28.2
LRU better docs
940_the_last_of_the_mohicans_a_narrative_of_1757_brat_0, 1.4
11231_bartleby_the_scrivener_a_story_of_wallstreet_brat_0, 1.2

Num cells 10
Learned better docs
1260_jane_eyre_an_autobiography_brat_0, 16.4
36_the_war_of_the_worlds_brat_0, 15.5
41_the_legend_of_sleepy_hollow_brat_0, 15.1
LRU better docs
829_gullivers_travels_into_several_remote_nations_of_the_world_brat_0, 3.6
974_the_secret_agent_a_simple_tale_brat_0, 2.0
5348_ragged_dick_or_street_life_in_new_york_with_the_bootblacks_brat_0, 1.5

Num cells 20
Learned better docs
1206_the_flying_u_ranch_brat_0, 6.9
599_vanity_fair_brat_0, 6.6
1260_jane_eyre_an_autobiography_brat_0, 6.5
LRU better docs
5230_the_invisible_man_a_grotesque_romance_brat_0, 5.3
1023_bleak_house_brat_0, 4.8
77_the_house_of_the_seven_gables_brat_0, 4.7

