In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime
import logging
from collections import defaultdict

import dill
import numpy as np
import pymongo
import pandas as pd
from sklearn.linear_model import LogisticRegression
from typing import Any

from CrossValidation import cross_validation
from Settings import Settings
from cost_functions import *
from crel_helper import get_cr_tags
from function_helpers import get_function_names, get_functions_by_name
from results_procesor import ResultsProcessor, __MICRO_F1__
from searn_parser import SearnModelTemplateFeatures
from template_feature_extractor import *
from window_based_tagger_config import get_config
from wordtagginghelper import merge_dictionaries
from searn_parser_breadth_first import ParseActionResult, SearnModelBreadthFirst

In [3]:
# Data Set Partition
CV_FOLDS = 5
MIN_FEAT_FREQ = 5

# Global settings
settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
training_folder = root_folder + "Training" + "/"
test_folder = root_folder + "Test" + "/"

coref_root = root_folder + "CoReference/"
coref_output_folder = coref_root + "CRel/"

config = get_config(training_folder)

Results Dir: /Users/simon.hughes/Google Drive/Phd/Results/
Data Dir:    /Users/simon.hughes/Google Drive/Phd/Data/
Root Dir:    /Users/simon.hughes/GitHub/NlpResearch/
Public Data: /Users/simon.hughes/GitHub/NlpResearch/Data/PublicDatasets/


In [4]:
train_fname = coref_output_folder + "training_crel_anatagged_essays_most_recent_code.dill"
with open(train_fname, "rb") as f:
    pred_tagged_essays_train = dill.load(f)

test_fname = coref_output_folder + "test_crel_anatagged_essays_most_recent_code.dill"
with open(test_fname, "rb") as f:
    pred_tagged_essays_test = dill.load(f)

len(pred_tagged_essays_train),len(pred_tagged_essays_test)

(902, 226)

In [5]:
EMPTY = "Empty"
from BrattEssay import ANAPHORA

def to_is_valid_crel(tags):
    filtered = set()
    for t in tags:
        t_lower = t.lower()
        if "rhetorical" in t_lower or "change" in t_lower or "other" in t_lower:
            continue
        if "->" in t and ANAPHORA not in t:
            filtered.add(t)
    return filtered

def get_crel_tags_by_sent(essays_a):
    crels_by_sent = []
    for ea in essays_a:
        for asent in ea.sentences:
            all_atags = set()
            for awd, atags in asent:
                all_atags.update(to_is_valid_crel(atags))
            crels_by_sent.append(all_atags)
    return crels_by_sent

In [6]:
cr_tags = get_cr_tags(train_tagged_essays=pred_tagged_essays_train, tag_essays_test=pred_tagged_essays_test)
cr_tags[0:10]

['Causer:5->Result:50',
 'Causer:7->Result:50',
 'Causer:3->Result:4',
 'Causer:13->Result:50',
 'Causer:1->Result:50',
 'Causer:11->Result:50',
 'Causer:6->Result:50',
 'Causer:3->Result:5',
 'Causer:4->Result:14',
 'Causer:3->Result:1']

In [7]:
set_cr_tags = set(cr_tags)

In [8]:
def evaluate_model_essay_level(
        collection_prefix: str,
        folds: List[Tuple[Any, Any]],
        extractor_fn_names_lst: List[str],
        cost_function_name: str,
        beta: float,
        ngrams: int,
        stemmed: bool,
        max_epochs: int,
        down_sample_rate=1.0) -> float:

    if down_sample_rate < 1.0:
        new_folds = []  # type: List[Tuple[Any, Any]]
        for i, (essays_TD, essays_VD) in enumerate(folds):
            essays_TD = essays_TD[:int(down_sample_rate * len(essays_TD))]
            essays_VD = essays_VD[:int(down_sample_rate * len(essays_VD))]
            new_folds.append((essays_TD, essays_VD))
        folds = new_folds  # type: List[Tuple[Any, Any]]

    serial_results = [
        model_train_predict_essay_level(essays_TD, essays_VD, extractor_fn_names_lst, cost_function_name, ngrams, stemmed, beta, max_epochs)
        for essays_TD, essays_VD in folds
    ]

    cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag = defaultdict(list), defaultdict(list)
    cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = defaultdict(list), defaultdict(list)

    # record the number of features in each fold
    number_of_feats = []

    # Parallel is almost 5X faster!!!
    parser_models = []
    for (model, num_feats,
         sent_td_ys_bycode, sent_vd_ys_bycode,
         sent_td_pred_ys_bycode, sent_vd_pred_ys_bycode) in serial_results:
        number_of_feats.append(num_feats)

        parser_models.append(model)
        merge_dictionaries(sent_td_ys_bycode, cv_sent_td_ys_by_tag)
        merge_dictionaries(sent_vd_ys_bycode, cv_sent_vd_ys_by_tag)
        merge_dictionaries(sent_td_pred_ys_bycode, cv_sent_td_predictions_by_tag)
        merge_dictionaries(sent_vd_pred_ys_bycode, cv_sent_vd_predictions_by_tag)

    # print(processor.results_to_string(sent_td_objectid, CB_SENT_TD, sent_vd_objectid, CB_SENT_VD, "SENTENCE"))
    return parser_models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag

In [9]:
def add_labels(observed_tags, ys_bytag_sent):
    global set_cr_tags
    for tag in set_cr_tags:
        if tag in observed_tags:
            ys_bytag_sent[tag].append(1)
        else:
            ys_bytag_sent[tag].append(0)
            
def get_label_data_essay_level(tagged_essays):
    global set_cr_tags
    # outputs
    ys_bytag_essay = defaultdict(list)

    for essay in tagged_essays:
        unique_cr_tags = set()
        for sentence in essay.sentences:
            for word, tags in sentence:
                unique_cr_tags.update(set_cr_tags.intersection(tags))
        add_labels(unique_cr_tags, ys_bytag_essay)
    return ys_bytag_essay

In [10]:
def metrics_to_df(metrics):
    import Rpfa

    rows = []
    for k,val in metrics.items():
        if type(val) == Rpfa.rpfa:
            d = dict(val.__dict__) # convert obj to dict
        elif type(val) == dict:
            d = dict(val)
        else:
            d = dict()
        d["code"] = k
        rows.append(d)
    return pd.DataFrame(rows)

def get_micro_metrics(df):
    return df[df.code == "MICRO_F1"][["accuracy", "f1_score", "recall", "precision"]]

def predict_essay_level(parser, essays):
    pred_ys_by_sent = defaultdict(list)
    for essay_ix, essay in enumerate(essays):
        unq_pre_relations = set()
        for sent_ix, taggged_sentence in enumerate(essay.sentences):
            predicted_tags = essay.pred_tagged_sentences[sent_ix]
            pred_relations = parser.predict_sentence(taggged_sentence, predicted_tags)
            unq_pre_relations.update(pred_relations)
        # Store predictions for evaluation
        add_labels(unq_pre_relations, pred_ys_by_sent)
    return pred_ys_by_sent

In [11]:
LINE_WIDTH = 80

# other settings
DOWN_SAMPLE_RATE = 1.0  # For faster smoke testing the algorithm
BASE_LEARNER_FACT = None
COLLECTION_PREFIX = "CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_MOST_RECENT_CODE"

# some of the other extractors aren't functional if the system isn't able to do a basic parse
# so the base extractors are the MVP for getting to a basic parser, then additional 'meta' parse
# features from all_extractors can be included
base_extractors = [
    single_words,
    word_pairs,
    three_words,
    between_word_features
]

all_extractor_fns = base_extractors + [
    word_distance,
    valency,
    unigrams,
    third_order,
    label_set,
    size_features
]

all_cost_functions = [
    micro_f1_cost,
    micro_f1_cost_squared,
    micro_f1_cost_plusone,
    micro_f1_cost_plusepsilon,
    binary_cost,
    inverse_micro_f1_cost,
    uniform_cost
]

all_extractor_fn_names = get_function_names(all_extractor_fns)
base_extractor_fn_names = get_function_names(base_extractors)
all_cost_fn_names = get_function_names(all_cost_functions)

ngrams = 1
stemmed = True
cost_function_name = micro_f1_cost_plusepsilon.__name__
dual = True
fit_intercept = True
beta = 0.5
max_epochs = 2
C = 0.5
penalty = "l2"

In [12]:
# Note these also differ for SC dataset
BASE_LEARNER_FACT = lambda : LogisticRegression(dual=dual, C=C, penalty=penalty, fit_intercept=fit_intercept)
best_extractor_names = ['single_words', 'between_word_features', 'label_set',
                                    'three_words', 'third_order', 'unigrams'] # type: List[str]

In [13]:
def model_train_predict_essay_level(essays_TD, essays_VD, extractor_names, cost_function_name, ngrams, stemmed, beta, max_epochs):
    extractors = get_functions_by_name(extractor_names, all_extractor_fns)
    # get single cost function
    cost_fn = get_functions_by_name([cost_function_name], all_cost_functions)[0]
    assert cost_fn is not None, "Cost function look up failed"
    # Ensure all extractors located
    assert len(extractors) == len(extractor_names), "number of extractor functions does not match the number of names"

    template_feature_extractor = NonLocalTemplateFeatureExtractor(extractors=extractors)
    if stemmed:
        ngram_extractor = NgramExtractorStemmed(max_ngram_len=ngrams)
    else:
        ngram_extractor = NgramExtractor(max_ngram_len=ngrams)
    parse_model = SearnModelBreadthFirst(feature_extractor=template_feature_extractor,
                                             cost_function=cost_fn,
                                             min_feature_freq=MIN_FEAT_FREQ,
                                             ngram_extractor=ngram_extractor, cr_tags=cr_tags,
                                             base_learner_fact=BASE_LEARNER_FACT,
                                             beta=beta,
                                             # log_fn=lambda s: print(s))
                                             log_fn=lambda s: None)

    parse_model.train(essays_TD, max_epochs=max_epochs)

    num_feats = template_feature_extractor.num_features()

    sent_td_ys_bycode = get_label_data_essay_level(essays_TD)
    sent_vd_ys_bycode = get_label_data_essay_level(essays_VD)

    sent_td_pred_ys_bycode = predict_essay_level(parse_model, essays_TD)
    sent_vd_pred_ys_bycode = predict_essay_level(parse_model, essays_VD)

    return parse_model, num_feats, sent_td_ys_bycode, sent_vd_ys_bycode, sent_td_pred_ys_bycode, sent_vd_pred_ys_bycode

In [14]:
test_folds     = [(pred_tagged_essays_train, pred_tagged_essays_test)]  # type: List[Tuple[Any,Any]]

## Essay Level Results

In [15]:
result_test_essay_level = evaluate_model_essay_level(
    collection_prefix=COLLECTION_PREFIX,
    folds=test_folds,
    extractor_fn_names_lst=best_extractor_names,
    cost_function_name=cost_function_name,
    ngrams=ngrams,
    beta=beta,
    stemmed=stemmed,
    down_sample_rate=DOWN_SAMPLE_RATE,
    max_epochs=max_epochs)

## Train

In [16]:
models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_td_preds_by_sent, \
    cv_sent_vd_ys_by_tag = result_test_essay_level
    
mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag)
get_micro_metrics(metrics_to_df(mean_metrics))

Unnamed: 0,accuracy,f1_score,recall,precision
95,0.985929,0.782936,0.760219,0.807052


## Test

In [17]:
models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, \
    cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = result_test_essay_level
    
mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag)
get_micro_metrics(metrics_to_df(mean_metrics))

Unnamed: 0,accuracy,f1_score,recall,precision
95,0.98585,0.744063,0.773309,0.716949


In [18]:
from itertools import combinations

def get_possible_crels(predicted_tags):
    if len(predicted_tags) < 2:
        return set()
    predicted_tags = sorted(predicted_tags)
    pred_crels = set()
    for a,b in combinations(predicted_tags, 2):
        pred_crels.add("Causer:{a}->Result:{b}".format(a=a, b=b))
        pred_crels.add("Causer:{b}->Result:{a}".format(a=a, b=b))
    return pred_crels

def to_canonical_parse(crels):
    return tuple(sorted(crels))

def get_crels(parse):
    crels = set()
    p = parse
    while p:
        if p.relations:
            crels.update(p.relations)
        p = p.parent_action
    return crels

In [19]:
%%time
model = models[0]
parses = []
sent2parse = dict()
for eix, essay in enumerate(pred_tagged_essays_test):
    for sent_ix, taggged_sentence in enumerate(essay.sentences):
        predicted_tags = essay.pred_tagged_sentences[sent_ix]
        unique_cr_tags = set()
        for word, tags in taggged_sentence:
            unique_cr_tags.update(set_cr_tags.intersection(tags))
        unq_ptags = set([p for p in predicted_tags if p != EMPTY])
        possible_rels = get_possible_crels(unq_ptags)
        best_parse = to_canonical_parse(possible_rels.intersection(unique_cr_tags))
        gold_parse = to_canonical_parse(unique_cr_tags)
        if len(best_parse) > 0:
            pred_parses = model.generate_all_potential_parses_for_sentence(
                tagged_sentence=taggged_sentence, predicted_tags=predicted_tags, 
                top_n=500)
            parses.append((eix, sent_ix, pred_parses, best_parse, gold_parse))
            sent2parse[(eix,sent_ix)] = (pred_parses, best_parse, gold_parse)
            #print("parses:", len(pred_parses))

CPU times: user 2min 12s, sys: 984 ms, total: 2min 12s
Wall time: 2min 13s


In [21]:
pred_ranks = []
missed = []
for eix, essay in enumerate(pred_tagged_essays_test):
    for sent_ix, taggged_sentence in enumerate(essay.sentences):
        key = (eix, sent_ix)
        if key in sent2parse:
            predicted_tags = essay.pred_tagged_sentences[sent_ix]        
            pred_crels = model.predict_sentence(tagged_sentence=taggged_sentence, predicted_tags=predicted_tags)
            
            pred_parses, best_parse, gold_parse = sent2parse[(eix, sent_ix)]
            top_rank = -1
            for rank, pp in enumerate(pred_parses):            
                parse_crels = get_crels(pp)
                if parse_crels == pred_crels:
                    top_rank = rank
                    break
            pred_ranks.append(top_rank)
            if top_rank != 0:
                missed.append((eix, sent_ix, top_rank, pred_parses, pred_crels))

In [23]:
max(pred_ranks), min(pred_ranks), len(missed)

(0, 0, 0)

In [37]:
# import inspect
# lines = inspect.getsource(SearnModelBreadthFirst)
# print(lines)

In [27]:
len(missed)

0

In [29]:
len([pred_ranks for p in pred_ranks if p != 0]), len(pred_ranks)

(2, 413)

In [33]:
[p for p in pred_ranks if p != 0]

[2, 2]

In [31]:
eix, sent_ix, top_rank, pred_parses, pred_crels = missed[0]
eix, sent_ix

(47, 4)

In [40]:
print(pred_crels)
print()
for pp in pred_parses[0:3]:
    a_crels = get_crels(pp)
    print(a_crels)
    acts = pp.get_action_sequence()
    print(len(acts))
    for a in acts:
        print(a)
    print()

{'Causer:1->Result:3'}

{'Causer:3->Result:3', 'Causer:1->Result:3'}
7
Shift:0.9836 	 :-1.0000
Shift:0.9674 	 :-1.0000
LArc:0.9008 	 CAUSE_EFFECT:0.8463
LArc:0.9861 	 CAUSE_EFFECT:0.9617
Shift:0.9989 	 :-1.0000
LArc:0.9900 	 CAUSE_EFFECT:0.4881
Shift:0.9856 	 :-1.0000

{'Causer:3->Result:3', 'Causer:1->Result:3'}
7
Shift:0.9836 	 :-1.0000
Shift:0.9674 	 :-1.0000
LArc:0.9008 	 CAUSE_EFFECT:0.8463
LArc:0.9861 	 CAUSE_EFFECT:0.9617
Shift:0.9989 	 :-1.0000
LArc:0.9900 	 CAUSE_EFFECT:0.4881
Skip:0.0101 	 :-1.0000

{'Causer:1->Result:3'}
7
Shift:0.9836 	 :-1.0000
Shift:0.9674 	 :-1.0000
LArc:0.9008 	 CAUSE_EFFECT:0.8463
LArc:0.9861 	 CAUSE_EFFECT:0.9617
Shift:0.9989 	 :-1.0000
LArc:0.9900 	 REJECT:0.4694
Shift:0.9836 	 :-1.0000



In [44]:
type(crels), type(pred_crels)

(set, set)

In [25]:
print(pred_crels)
print()
print()
for rank, pp in enumerate(pred_parses):
    crels = get_crels(pp)
    print(rank)
    print(crels)
    if crels == pred_crels:
        print(rank)

{'Causer:1->Result:3'}


0
{'Causer:3->Result:3', 'Causer:1->Result:3'}
1
{'Causer:3->Result:3', 'Causer:1->Result:3'}
2
{'Causer:1->Result:3'}
2
3
{'Causer:1->Result:3'}
3
4
{'Causer:3->Result:3', 'Causer:1->Result:3'}
5
{'Causer:3->Result:3', 'Causer:1->Result:3'}
6
{'Causer:3->Result:3', 'Causer:1->Result:3'}
7
{'Causer:3->Result:3', 'Causer:1->Result:3'}
8
{'Causer:1->Result:3'}
8
9
{'Causer:1->Result:3'}
9
10
{'Causer:3->Result:3', 'Causer:1->Result:3'}
11
{'Causer:1->Result:3'}
11
12
{'Causer:3->Result:3', 'Causer:1->Result:3'}
13
{'Causer:3->Result:3', 'Causer:1->Result:3'}
14
{'Causer:1->Result:3'}
14
15
{'Causer:1->Result:3'}
15
16
{'Causer:1->Result:3'}
16
17
{'Causer:1->Result:3'}
17
18
{'Causer:3->Result:3', 'Causer:1->Result:3'}
19
{'Causer:3->Result:3', 'Causer:1->Result:3'}
20
{'Causer:1->Result:3'}
20
21
{'Causer:1->Result:3'}
21
22
{'Causer:3->Result:3', 'Causer:1->Result:3'}
23
{'Causer:3->Result:3', 'Causer:1->Result:3'}
24
{'Causer:3->Result:3', 'Causer:1->Result:3'

{'Causer:1->Result:3',
 'Causer:1->Result:6',
 'Causer:3->Result:1',
 'Causer:50->Result:6',
 'Causer:6->Result:1',
 'Causer:6->Result:50'}

In [35]:
len([p for p in pred_ranks if p != -1]), len(pred_ranks)

(393, 413)

In [36]:
pred_ranks[0:30]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 -1,
 0,
 0,
 0,
 0,
 0,
 0,
 5,
 -1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 7,
 0,
 0,
 0]

In [56]:
for rank, p in enumerate(srtd):
    if get_crels(p) == pred_crels:
        print(rank)

0
1
10
70
71
77
80
81
82
135


In [450]:
pp_best = pred_parses[3]
get_parse_actions(pp_best)

[('Shift', 0.996526847521099, set(), None),
 ('Rarc ', 0.6183561896146336, {'Causer:6->Result:50'}, 'EFFECT_CAUSE'),
 ('LArc ', 0.6848744134207962, {'Causer:6->Result:14'}, 'CAUSE_EFFECT'),
 ('LArc ', 0.964686993674217, {'Causer:14->Result:50'}, 'EFFECT_CAUSE'),
 ('Shift', 0.8856456131967131, set(), None),
 ('LArc ', 0.5860199657329614, {'Causer:14->Result:50'}, 'CAUSE_EFFECT'),
 ('Shift', 0.9980560375272031, set(), None),
 ('LArc ', 0.9727310340889151, {'Causer:7->Result:50'}, 'EFFECT_CAUSE'),
 ('Shift', 0.9555659616960438, set(), None)]

In [442]:
pred_crels - get_crels(pp)

{'Causer:6->Result:50'}

In [38]:
len(pred_parses)

500

In [37]:
eix, sent_ix, top_rank, pred_parses, pred_crels = missed[3]
print(eix, sent_ix)
print(to_canonical_parse(pred_crels))
print()
for pp in pred_parses[0:10]:
    print(to_canonical_parse(get_crels(pp))) #, pred_crels - get_crels(pp))

4 4
('Causer:1->Result:3', 'Causer:3->Result:50')

('Causer:3->Result:1', 'Causer:3->Result:50', 'Causer:50->Result:3')
('Causer:3->Result:1', 'Causer:3->Result:50', 'Causer:50->Result:1', 'Causer:50->Result:3')
('Causer:3->Result:50', 'Causer:50->Result:3')
('Causer:3->Result:50', 'Causer:50->Result:3')
('Causer:3->Result:50', 'Causer:50->Result:1', 'Causer:50->Result:3')
('Causer:3->Result:50', 'Causer:50->Result:1', 'Causer:50->Result:3')
('Causer:3->Result:50', 'Causer:50->Result:3')
('Causer:3->Result:50', 'Causer:50->Result:3')
('Causer:3->Result:1', 'Causer:3->Result:50', 'Causer:50->Result:3')
('Causer:3->Result:1', 'Causer:3->Result:50', 'Causer:50->Result:1')


In [393]:
def get_relations(parse):
    crels = []
    p = parse
    while p:
        if p.relations:
            crels.append((p.prob, p.relations))
        p = p.parent_action
    return crels

eix, sent_ix, p, best_cr, gold_cr =  parses[9]
print(best_cr)
print(gold_cr)
for pix, parse in enumerate(p[0:10]):    
    relations = get_relations(parse)
    if relations:
        print(pix, parse.cum_prob, relations)

('Causer:1->Result:50', 'Causer:3->Result:50')
('Causer:1->Result:50', 'Causer:3->Result:50')
0 0.9699260628959838 [(0.9773570349012927, {'Causer:1->Result:50'}), (0.9213480687170373, {'Causer:3->Result:50'})]
1 0.9339234788672464 [(0.8580638349196954, {'Causer:1->Result:50'}), (0.9213480687170373, {'Causer:3->Result:50'})]
2 0.9040129135038413 [(0.9921987471015425, {'Causer:1->Result:50'}), (0.9213480687170373, {'Causer:3->Result:50'})]
3 0.8424664659685894 [(0.820068861551706, {'Causer:1->Result:50'}), (0.9213480687170373, {'Causer:3->Result:50'})]
4 0.6641545332755303 [(0.13580977516546924, {'Causer:3->Result:50'})]
5 0.6378868134772913 [(0.9739531250396251, {'Causer:1->Result:50'}), (0.12019366565821911, {'Causer:1->Result:3'}), (0.9213480687170373, {'Causer:3->Result:50'})]
6 0.6171575694215653 [(0.13580977516546924, {'Causer:3->Result:50'})]
7 0.5854511587771295 [(0.9773570349012927, {'Causer:1->Result:50'}), (0.9213480687170373, {'Causer:3->Result:50'})]
8 0.5492400640439942 [(0

In [435]:
def get_unique_relations(parse):
    crels = set()
    p = parse
    while p:
        if p.relations:
            crels.update(p.relations)
        p = p.parent_action
    return crels

def get_parse_actions(parse):
    actions = []
    p = parse
    while p:
        actions.append((p.action, p.prob, p.relations, p.lr_action_probs))
        p = p.parent_action
    return actions[::-1]

ranks = []
top_probs = []
for (eix,sent_ix), (pred_parses, best_parse, gold_parse) in sent2parse.items():  
    top_rank = -1
    top_rank_prob = -1
    for rank, parse in enumerate(pred_parses):
#         print(rank, parse.cum_prob, parse)
        pred_parse = to_canonical_parse(get_unique_relations(parse))
        if pred_parse == best_parse:
            top_rank = rank
            top_rank_prob = parse.cum_prob
            break
    ranks.append(top_rank)
    top_probs.append(top_rank_prob)

In [375]:
len([r for r in ranks if r != -1]) / len(ranks)

0.9975786924939467

In [382]:
# % in top_n
len([r for r in ranks if r != -1 and r < 300]) / len(ranks), np.mean([r for r in ranks if r != -1 and r < 500])

(0.9951573849878934, 4.915048543689321)

In [387]:
tp = [p for p in top_probs if p != -1]
np.mean(tp), np.max(tp), np.min(tp), np.percentile(tp,1)

(0.8489996304789936,
 0.9907129055552866,
 0.06311890298667518,
 0.2717343285392599)

In [388]:
ranks_incl = []
has_all_rels = []
missing = []
for (eix,sent_ix), (pred_parses, best_parse, gold_parse) in sent2parse.items():  
    top_rank = -1
    set_best_parse = set(best_parse)
    all_preds = set()
    
    for rank, parse in enumerate(pred_parses):
        set_pred_parse = get_unique_relations(parse)
        all_preds.update(set_pred_parse)
        if len(set_best_parse - set_pred_parse) == 0:
            top_rank = rank
            break
    ranks_incl.append(top_rank)
    if len(set_best_parse - all_preds) == 0:
        has_all_rels.append(1)
    else:
        has_all_rels.append(0)
        missing.append((eix, sent_ix, pred_parses, all_preds, best_parse, gold_parse))

In [389]:
len([r for r in ranks_incl if r != -1]) / len(ranks_incl)

0.9975786924939467

In [390]:
np.mean(has_all_rels)

0.9975786924939467

In [392]:
len(missing)

1

In [391]:
eix, sent_ix, pred_parses, all_preds, best_parse, gold_parse = missing[3]
e = pred_tagged_essays_test[eix]
set(e.pred_tagged_sentences[sent_ix])

IndexError: list index out of range

In [None]:


def print_dict(d):
    s = ""
    for k,v in sorted(d.items(), key = lambda tpl: -tpl[-1]):
        s +="{key}:{val:.3f}, ".format(key=k, val=v)
    return s.strip()[:-1]

print(best_parse)
print([t for t in e.pred_tagged_sentences[sent_ix] if t != EMPTY])
print()
for p in pred_parses:
    actions = get_parse_actions(p)
    for act in actions:
        print(act[0:2], str(act[2]).ljust(25), act[3])
    print()