In [2]:
import datetime
import logging
from collections import defaultdict

import dill
import numpy as np
import pymongo
import pandas as pd
from sklearn.linear_model import LogisticRegression
from typing import Any

from CrossValidation import cross_validation
from Settings import Settings
from cost_functions import *
from crel_helper import get_cr_tags
from function_helpers import get_function_names, get_functions_by_name
from results_procesor import ResultsProcessor, __MICRO_F1__
from searn_parser import SearnModelTemplateFeatures
from template_feature_extractor import *
from window_based_tagger_config import get_config
from wordtagginghelper import merge_dictionaries

In [3]:
# Data Set Partition
CV_FOLDS = 5
MIN_FEAT_FREQ = 5

# Global settings
settings = Settings()
root_folder = settings.data_directory + "CoralBleaching/Thesis_Dataset/"
training_folder = root_folder + "Training" + "/"
test_folder = root_folder + "Test" + "/"

coref_root = root_folder + "CoReference/"
coref_output_folder = coref_root + "CRel/"

config = get_config(training_folder)

Results Dir: /Users/simon.hughes/Google Drive/Phd/Results/
Data Dir:    /Users/simon.hughes/Google Drive/Phd/Data/
Root Dir:    /Users/simon.hughes/GitHub/NlpResearch/
Public Data: /Users/simon.hughes/GitHub/NlpResearch/Data/PublicDatasets/


In [4]:
train_fname = coref_output_folder + "training_crel_anatagged_essays_most_recent_code.dill"
with open(train_fname, "rb") as f:
    pred_tagged_essays_train = dill.load(f)

test_fname = coref_output_folder + "test_crel_anatagged_essays_most_recent_code.dill"
with open(test_fname, "rb") as f:
    pred_tagged_essays_test = dill.load(f)

len(pred_tagged_essays_train),len(pred_tagged_essays_test)

(902, 226)

In [5]:
EMPTY = "Empty"
from BrattEssay import ANAPHORA

def to_is_valid_crel(tags):
    filtered = set()
    for t in tags:
        t_lower = t.lower()
        if "rhetorical" in t_lower or "change" in t_lower or "other" in t_lower:
            continue
        if "->" in t and ANAPHORA not in t:
            filtered.add(t)
    return filtered

def get_crel_tags_by_sent(essays_a):
    crels_by_sent = []
    for ea in essays_a:
        for asent in ea.sentences:
            all_atags = set()
            for awd, atags in asent:
                all_atags.update(to_is_valid_crel(atags))
            crels_by_sent.append(all_atags)
    return crels_by_sent

In [6]:
cr_tags = get_cr_tags(train_tagged_essays=pred_tagged_essays_train, tag_essays_test=pred_tagged_essays_test)
cr_tags[0:10]

['Causer:5->Result:50',
 'Causer:7->Result:50',
 'Causer:3->Result:4',
 'Causer:13->Result:50',
 'Causer:1->Result:50',
 'Causer:11->Result:50',
 'Causer:6->Result:50',
 'Causer:3->Result:5',
 'Causer:4->Result:14',
 'Causer:3->Result:1']

In [7]:
set_cr_tags = set(cr_tags)

In [8]:
def evaluate_model_essay_level(
        collection_prefix: str,
        folds: List[Tuple[Any, Any]],
        extractor_fn_names_lst: List[str],
        cost_function_name: str,
        beta: float,
        ngrams: int,
        stemmed: bool,
        max_epochs: int,
        down_sample_rate=1.0) -> float:

    if down_sample_rate < 1.0:
        new_folds = []  # type: List[Tuple[Any, Any]]
        for i, (essays_TD, essays_VD) in enumerate(folds):
            essays_TD = essays_TD[:int(down_sample_rate * len(essays_TD))]
            essays_VD = essays_VD[:int(down_sample_rate * len(essays_VD))]
            new_folds.append((essays_TD, essays_VD))
        folds = new_folds  # type: List[Tuple[Any, Any]]

    serial_results = [
        model_train_predict_essay_level(essays_TD, essays_VD, extractor_fn_names_lst, cost_function_name, ngrams, stemmed, beta, max_epochs)
        for essays_TD, essays_VD in folds
    ]

    cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag = defaultdict(list), defaultdict(list)
    cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = defaultdict(list), defaultdict(list)

    # record the number of features in each fold
    number_of_feats = []

    # Parallel is almost 5X faster!!!
    parser_models = []
    for (model, num_feats,
         sent_td_ys_bycode, sent_vd_ys_bycode,
         sent_td_pred_ys_bycode, sent_vd_pred_ys_bycode) in serial_results:
        number_of_feats.append(num_feats)

        parser_models.append(model)
        merge_dictionaries(sent_td_ys_bycode, cv_sent_td_ys_by_tag)
        merge_dictionaries(sent_vd_ys_bycode, cv_sent_vd_ys_by_tag)
        merge_dictionaries(sent_td_pred_ys_bycode, cv_sent_td_predictions_by_tag)
        merge_dictionaries(sent_vd_pred_ys_bycode, cv_sent_vd_predictions_by_tag)

    # print(processor.results_to_string(sent_td_objectid, CB_SENT_TD, sent_vd_objectid, CB_SENT_VD, "SENTENCE"))
    return parser_models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag

In [9]:
def add_labels(observed_tags, ys_bytag_sent):
    global set_cr_tags
    for tag in set_cr_tags:
        if tag in observed_tags:
            ys_bytag_sent[tag].append(1)
        else:
            ys_bytag_sent[tag].append(0)
            
def get_label_data_essay_level(tagged_essays):
    global set_cr_tags
    # outputs
    ys_bytag_essay = defaultdict(list)

    for essay in tagged_essays:
        unique_cr_tags = set()
        for sentence in essay.sentences:
            for word, tags in sentence:
                unique_cr_tags.update(set_cr_tags.intersection(tags))
        add_labels(unique_cr_tags, ys_bytag_essay)
    return ys_bytag_essay

In [10]:
def metrics_to_df(metrics):
    import Rpfa

    rows = []
    for k,val in metrics.items():
        if type(val) == Rpfa.rpfa:
            d = dict(val.__dict__) # convert obj to dict
        elif type(val) == dict:
            d = dict(val)
        else:
            d = dict()
        d["code"] = k
        rows.append(d)
    return pd.DataFrame(rows)

def get_micro_metrics(df):
    return df[df.code == "MICRO_F1"][["accuracy", "f1_score", "recall", "precision"]]

def predict_essay_level(parser, essays):
    pred_ys_by_sent = defaultdict(list)
    for essay_ix, essay in enumerate(essays):
        unq_pre_relations = set()
        for sent_ix, taggged_sentence in enumerate(essay.sentences):
            predicted_tags = essay.pred_tagged_sentences[sent_ix]
            pred_relations = parser.predict_sentence(taggged_sentence, predicted_tags)
            unq_pre_relations.update(pred_relations)
        # Store predictions for evaluation
        add_labels(unq_pre_relations, pred_ys_by_sent)
    return pred_ys_by_sent

In [11]:
from collections import defaultdict
from typing import Set, List

from StructuredLearning.SEARN.stack import Stack
from oracle import Oracle
from shift_reduce_helper import *
from shift_reduce_parser import ShiftReduceParser

from Classifiers.StructuredLearning.SEARN.searn_parser import SearnModelTemplateFeatures

In [286]:
class ParseActionResult(object):
    def __init__(self, action, relations, prob, cause2effects, effect2causers, oracle, tag_ix, ctx, parent_action, lr_action_probs):
        self.action = action
        self.relations = relations
        self.prob = prob
        self.cause2effects = cause2effects
        self.effect2causers = effect2causers
        self.oracle = oracle
        self.current_tag_ix = tag_ix # store for reference / debugging
        self.tag_ix = tag_ix
        self.ctx = ctx
        self.parent_action = parent_action
        self.lr_action_probs = lr_action_probs

        self.probs = [self.prob]
        if parent_action is not None:
            self.probs = parent_action.probs + self.probs
        self.cum_prob = np.product(self.probs)
        self.__execute__()

    def __execute__(self):
        buffer_tag_pair = self.ctx.pos_ptag_seq[self.tag_ix]
        if not self.oracle.execute(self.action, self.oracle.tos(), buffer_tag_pair) or self.oracle.is_stack_empty():
            # increment tag_ix
            self.tag_ix += 1

    def is_terminal(self):
        return self.tag_ix >= len(self.ctx.pos_ptag_seq)

class ParseContext(object):
    def __init__(self, pos_ptag_seq, tag2span, tag2words, words):
        self.pos_ptag_seq = pos_ptag_seq
        self.tag2span = tag2span
        self.tag2words = tag2words
        self.words = words

In [287]:
class SearnModelBreadthFirst(SearnModelTemplateFeatures):
    def __init__(self, *args, **kwargs):
        super(SearnModelBreadthFirst, self).__init__(*args, **kwargs)

    def build_parse_context(self, tagged_sentence, predicted_tags):
        pos_ptag_seq, _, tag2span, all_predicted_rtags, _ = self.get_tags_relations_for(
            tagged_sentence, predicted_tags, self.cr_tags)

        if len(all_predicted_rtags) == 0:
            return None

        # tags without positional info
        rtag_seq = [t for t, i in pos_ptag_seq if t[0].isdigit()]
        # if not at least 2 concept codes, then can't parse
        if len(rtag_seq) < 2:
            return None

        words = [wd for wd, tags in tagged_sentence]

        tag2words = defaultdict(list)
        for ix, tag_pair in enumerate(pos_ptag_seq):
            bstart, bstop = tag2span[tag_pair]
            tag2words[tag_pair] = self.ngram_extractor.extract(words[bstart:bstop + 1])  # type: List[str]

        ctx = ParseContext(pos_ptag_seq=pos_ptag_seq, tag2span=tag2span, tag2words=tag2words, words=words)
        return ctx

    def generate_all_potential_parses_for_sentence(self, tagged_sentence, predicted_tags, top_n):

        ctx = self.build_parse_context(tagged_sentence, predicted_tags)
        if not ctx:
            return []

        terminal_actions = []
        actions_queue = [None]
        while True:
            current_actions_queue = list(actions_queue)
            actions_queue = []
            for act in current_actions_queue:
                if act and act.is_terminal():
                    terminal_actions.append(act)
                actions_queue.extend(self.get_next_actions(act, ctx))

            if len(actions_queue) == 0:
                break
            # trim to top_n
            actions_queue = sorted(actions_queue,   key=lambda act: -act.cum_prob)[:top_n]

        terminal_actions = sorted(terminal_actions, key=lambda act: -act.cum_prob)
        return terminal_actions[:top_n]

    def get_next_actions(self, parse_action, ctx):
        next_actions = []
        if parse_action is None:
            # Initialize stack, basic parser and oracle
            oracle = self.create_oracle()
            tag_ix = 0
            cause2effects, effect2causers = defaultdict(set), defaultdict(set)
        else:
            if parse_action.is_terminal():
                return []
            oracle = parse_action.oracle
            tag_ix = parse_action.tag_ix
            cause2effects, effect2causers = parse_action.cause2effects, parse_action.effect2causers

            if tag_ix >= len(ctx.pos_ptag_seq):
                return next_actions

        return self.get_parse_action_results(cause2effects, effect2causers, oracle, tag_ix, ctx, parse_action)

    def get_parse_action_results(self, cause2effects, effect2causers, oracle, tag_ix, ctx, parent_action):
        buffer_tag_pair = ctx.pos_ptag_seq[tag_ix]
        buffer_tag = buffer_tag_pair[0]
        bstart, bstop = ctx.tag2span[buffer_tag_pair]
        remaining_buffer_tags = ctx.pos_ptag_seq[tag_ix:]

        tos_tag_pair = oracle.tos()
        tos_tag = tos_tag_pair[0]
        # Returns -1,-1 if TOS is ROOT
        if tos_tag == ROOT:
            tstart, tstop = -1, -1
        else:
            tstart, tstop = ctx.tag2span[tos_tag_pair]
        # Note that the end ix in tag2span is always the last index, not the last + 1
        btwn_start, btwn_stop = min(tstop + 1, len(ctx.words)), max(0, bstart)
        btwn_word_seq = ctx.words[btwn_start:btwn_stop]
        distance = len(btwn_word_seq)
        btwn_word_ngrams = self.ngram_extractor.extract(btwn_word_seq)  # type: List[str]
        feats = self.feat_extractor.extract(stack_tags=oracle.parser.stack.contents(), buffer_tags=remaining_buffer_tags,
                                            tag2word_seq=ctx.tag2words,
                                            between_word_seq=btwn_word_ngrams, distance=distance,
                                            cause2effects=cause2effects, effect2causers=effect2causers,
                                            positive_val=self.positive_val)

        action_probabilities = self.predict_parse_action_probabilities(feats=feats,
                                           tos=tos_tag,
                                           models=self.parser_models[-1],
                                           vectorizer=self.parser_feature_vectorizers[-1])

        parse_action_results = []
        for action, prob in action_probabilities.items():
            # Decide the direction of the causal relation
            new_relations = set()
            new_cause2effects = self.clone_default_dict(cause2effects)
            new_effect2causers = self.clone_default_dict(effect2causers)

            lr_action_probs = dict()
            if action in [LARC, RARC]:
                feats_copy = dict(feats)  # don't modify feats as we iterate through possibilities
                cause_effect, effect_cause = self.update_feats_with_action(action, buffer_tag, feats_copy, tos_tag)
                lr_action_probs = self.predict_parse_action_probabilities(feats=feats_copy,
                                                     model=self.crel_models[-1],
                                                     vectorizer=self.crel_feat_vectorizers[-1])

                lr_action = max(lr_action_probs.keys(), key = lambda k: lr_action_probs[k])
                new_relations = self.update_cause_effects(buffer_tag_pair,
                                                          new_cause2effects, cause_effect,
                                                          new_effect2causers, effect_cause,
                                                          lr_action, tos_tag_pair)

            parse_action_result = ParseActionResult(
                action, new_relations, prob, new_cause2effects, new_effect2causers, oracle.clone(), tag_ix, ctx, parent_action, lr_action_probs)
            parse_action_results.append(parse_action_result)
        return parse_action_results

    def update_cause_effects(self, buffer_tag_pair, cause2effects, cause_effect, effect2causers, effect_cause,
                             lr_action, tos_tag_pair):
        new_relations = set()
        if lr_action == CAUSE_AND_EFFECT:
            new_relations.add(cause_effect)
            new_relations.add(effect_cause)

            cause2effects[tos_tag_pair].add(buffer_tag_pair)
            effect2causers[buffer_tag_pair].add(tos_tag_pair)

            cause2effects[buffer_tag_pair].add(tos_tag_pair)
            effect2causers[tos_tag_pair].add(buffer_tag_pair)

        elif lr_action == CAUSE_EFFECT:
            new_relations.add(cause_effect)

            cause2effects[tos_tag_pair].add(buffer_tag_pair)
            effect2causers[buffer_tag_pair].add(tos_tag_pair)

        elif lr_action == EFFECT_CAUSE:
            new_relations.add(effect_cause)

            cause2effects[buffer_tag_pair].add(tos_tag_pair)
            effect2causers[tos_tag_pair].add(buffer_tag_pair)

        elif lr_action == REJECT:
            pass
        else:
            raise Exception("Invalid CREL type")
        return new_relations

    def clone_default_dict(self, d):
        new_dd = defaultdict(d.default_factory)
        new_dd.update(d)
        return new_dd

    def create_oracle(self):
        parser = ShiftReduceParser(Stack(verbose=False))
        parser.stack.push((ROOT, 0))
        # needs to be a tuple
        return Oracle([], parser)

    def predict_parse_action_probabilities(self, feats, tos, models, vectorizer):

        xs = vectorizer.transform(feats)
        prob_by_label = {}
        for action in self.randomize_actions():
            if not allowed_action(action, tos):
                continue

            prob_by_label[action] = models[action].predict_proba(xs)[0][-1]
        return prob_by_label

    def update_feats_with_action(self, action, buffer_tag, feats, tos_tag):
        c_e_pair = (tos_tag, buffer_tag)
        # Convert to a string Causer:{l}->Result:{r}
        cause_effect = denormalize_cr(c_e_pair)
        e_c_pair = (buffer_tag, tos_tag)
        # Convert to a string Causer:{l}->Result:{r}
        effect_cause = denormalize_cr(e_c_pair)
        # Add additional features
        # needs to be before predict below
        crel_feats = self.crel_features(action, tos_tag, buffer_tag)
        feats.update(crel_feats)
        return cause_effect, effect_cause

In [288]:
def model_train_predict_essay_level(essays_TD, essays_VD, extractor_names, cost_function_name, ngrams, stemmed, beta, max_epochs):
    extractors = get_functions_by_name(extractor_names, all_extractor_fns)
    # get single cost function
    cost_fn = get_functions_by_name([cost_function_name], all_cost_functions)[0]
    assert cost_fn is not None, "Cost function look up failed"
    # Ensure all extractors located
    assert len(extractors) == len(extractor_names), "number of extractor functions does not match the number of names"

    template_feature_extractor = NonLocalTemplateFeatureExtractor(extractors=extractors)
    if stemmed:
        ngram_extractor = NgramExtractorStemmed(max_ngram_len=ngrams)
    else:
        ngram_extractor = NgramExtractor(max_ngram_len=ngrams)
    parse_model = SearnModelBreadthFirst(feature_extractor=template_feature_extractor,
                                             cost_function=cost_fn,
                                             min_feature_freq=MIN_FEAT_FREQ,
                                             ngram_extractor=ngram_extractor, cr_tags=cr_tags,
                                             base_learner_fact=BASE_LEARNER_FACT,
                                             beta=beta,
                                             # log_fn=lambda s: print(s))
                                             log_fn=lambda s: None)

    parse_model.train(essays_TD, max_epochs=max_epochs)

    num_feats = template_feature_extractor.num_features()

    sent_td_ys_bycode = get_label_data_essay_level(essays_TD)
    sent_vd_ys_bycode = get_label_data_essay_level(essays_VD)

    sent_td_pred_ys_bycode = predict_essay_level(parse_model, essays_TD)
    sent_vd_pred_ys_bycode = predict_essay_level(parse_model, essays_VD)

    return parse_model, num_feats, sent_td_ys_bycode, sent_vd_ys_bycode, sent_td_pred_ys_bycode, sent_vd_pred_ys_bycode

In [289]:
LINE_WIDTH = 80

# other settings
DOWN_SAMPLE_RATE = 1.0  # For faster smoke testing the algorithm
BASE_LEARNER_FACT = None
COLLECTION_PREFIX = "CR_CB_SHIFT_REDUCE_PARSER_TEMPLATED_MOST_RECENT_CODE"

# some of the other extractors aren't functional if the system isn't able to do a basic parse
# so the base extractors are the MVP for getting to a basic parser, then additional 'meta' parse
# features from all_extractors can be included
base_extractors = [
    single_words,
    word_pairs,
    three_words,
    between_word_features
]

all_extractor_fns = base_extractors + [
    word_distance,
    valency,
    unigrams,
    third_order,
    label_set,
    size_features
]

all_cost_functions = [
    micro_f1_cost,
    micro_f1_cost_squared,
    micro_f1_cost_plusone,
    micro_f1_cost_plusepsilon,
    binary_cost,
    inverse_micro_f1_cost,
    uniform_cost
]

all_extractor_fn_names = get_function_names(all_extractor_fns)
base_extractor_fn_names = get_function_names(base_extractors)
all_cost_fn_names = get_function_names(all_cost_functions)

### Note that these are different for Skin Cancer dataset

In [290]:
ngrams = 1
stemmed = True
cost_function_name = micro_f1_cost_plusepsilon.__name__
dual = True
fit_intercept = True
beta = 0.5
max_epochs = 2
C = 0.5
penalty = "l2"

In [291]:
# Note these also differ for SC dataset
BASE_LEARNER_FACT = lambda : LogisticRegression(dual=dual, C=C, penalty=penalty, fit_intercept=fit_intercept)
best_extractor_names = ['single_words', 'between_word_features', 'label_set',
                                    'three_words', 'third_order', 'unigrams'] # type: List[str]

In [292]:
test_folds     = [(pred_tagged_essays_train, pred_tagged_essays_test)]  # type: List[Tuple[Any,Any]]

## Essay Level Results

In [293]:
result_test_essay_level = evaluate_model_essay_level(
    collection_prefix=COLLECTION_PREFIX,
    folds=test_folds,
    extractor_fn_names_lst=best_extractor_names,
    cost_function_name=cost_function_name,
    ngrams=ngrams,
    beta=beta,
    stemmed=stemmed,
    down_sample_rate=DOWN_SAMPLE_RATE,
    max_epochs=max_epochs)

## Train

In [294]:
models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, cv_td_preds_by_sent, \
    cv_sent_vd_ys_by_tag = result_test_essay_level
    
mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag)
get_micro_metrics(metrics_to_df(mean_metrics))

Unnamed: 0,accuracy,f1_score,recall,precision
95,0.986136,0.785121,0.758759,0.81338


## Test

In [295]:
models, cv_sent_td_ys_by_tag, cv_sent_td_predictions_by_tag, \
    cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag = result_test_essay_level
    
mean_metrics = ResultsProcessor.compute_mean_metrics(cv_sent_vd_ys_by_tag, cv_sent_vd_predictions_by_tag)
get_micro_metrics(metrics_to_df(mean_metrics))

Unnamed: 0,accuracy,f1_score,recall,precision
95,0.986094,0.748239,0.776965,0.721562


In [296]:
from itertools import combinations

def get_possible_crels(predicted_tags):
    if len(predicted_tags) < 2:
        return set()
    predicted_tags = sorted(predicted_tags)
    pred_crels = set()
    for a,b in combinations(predicted_tags, 2):
        pred_crels.add("Causer:{a}->Result:{b}".format(a=a, b=b))
        pred_crels.add("Causer:{b}->Result:{a}".format(a=a, b=b))
    return pred_crels

def to_canonical_parse(crels):
    return tuple(sorted(crels))

In [297]:
%%time
model = models[0]
parses = []
sent2parse = dict()
for eix, essay in enumerate(pred_tagged_essays_test):
    for sent_ix, taggged_sentence in enumerate(essay.sentences):
        predicted_tags = essay.pred_tagged_sentences[sent_ix]
        unique_cr_tags = set()
        for word, tags in taggged_sentence:
            unique_cr_tags.update(set_cr_tags.intersection(tags))
        unq_ptags = set([p for p in predicted_tags if p != EMPTY])
        possible_rels = get_possible_crels(unq_ptags)
        best_parse = to_canonical_parse(possible_rels.intersection(unique_cr_tags))
        gold_parse = to_canonical_parse(unique_cr_tags)
        if len(best_parse) > 0:
            pred_parses = model.generate_all_potential_parses_for_sentence(
                tagged_sentence=taggged_sentence, predicted_tags=predicted_tags, 
                top_n=500)
            parses.append((eix, sent_ix, pred_parses, best_parse, gold_parse))
            sent2parse[(eix,sent_ix)] = (pred_parses, best_parse, gold_parse)
            #print("parses:", len(pred_parses))


TypeError: predict_parse_action_probabilities() got an unexpected keyword argument 'model'

In [298]:
def get_relations(parse):
    crels = []
    p = parse
    while p:
        if p.relations:
            crels.append((p.prob, p.relations))
        p = p.parent_action
    return crels

eix, sent_ix, p, best_cr, gold_cr =  parses[9]
print(best_cr)
print(gold_cr)
for pix, parse in enumerate(p):    
    relations = get_relations(parse)
    if relations:
        print(pix, parse.cum_prob, relations)

IndexError: list index out of range

In [None]:
def get_unique_relations(parse):
    crels = set()
    p = parse
    while p:
        if p.relations:
            crels.update(p.relations)
        p = p.parent_action
    return crels

ranks = []
top_probs = []
for (eix,sent_ix), (pred_parses, best_parse, gold_parse) in sent2parse.items():  
    top_rank = -1
    top_rank_prob = -1
    for rank, parse in enumerate(pred_parses):
#         print(rank, parse.cum_prob, parse)
        pred_parse = to_canonical_parse(get_unique_relations(parse))
        if pred_parse == best_parse:
            top_rank = rank
            top_rank_prob = parse.cum_prob
            break
    ranks.append(top_rank)
    top_probs.append(top_rank_prob)

In [None]:
len([r for r in ranks if r != -1]) / len(ranks)

In [None]:
# % in top_n
len([r for r in ranks if r != -1 and r < 500]) / len(ranks)

np.mean([r for r in ranks if r != -1 and r < 500])

In [None]:
tp = [p for p in top_probs if p != -1]
np.mean(tp), np.max(tp), np.min(tp), np.percentile(tp,10)

In [None]:
ranks_incl = []
has_all_rels = []
missing = []
for (eix,sent_ix), (pred_parses, best_parse, gold_parse) in sent2parse.items():  
    top_rank = -1
    set_best_parse = set(best_parse)
    all_preds = set()
    
    for rank, parse in enumerate(pred_parses):
        set_pred_parse = get_unique_relations(parse)
        all_preds.update(set_pred_parse)
        if len(set_best_parse - set_pred_parse) == 0:
            top_rank = rank
            break
    ranks_incl.append(top_rank)
    if len(set_best_parse - all_preds) == 0:
        has_all_rels.append(1)
    else:
        has_all_rels.append(0)
        missing.append((eix, sent_ix, pred_parses, all_preds, best_parse, gold_parse))

In [None]:
len([r for r in ranks_incl if r != -1]) / len(ranks_incl)

In [None]:
np.mean(has_all_rels)

In [None]:
eix, sent_ix, pred_parses, all_preds, best_parse, gold_parse = missing[0]
e = pred_tagged_essays_test[eix]
set(e.pred_tagged_sentences[sent_ix])

In [None]:
best_parse

In [None]:
def get_parse_actions(parse):
    actions = []
    p = parse
    while p:
        actions.append((p.action, p.prob, p.relations, p.lr_action_probs))
        p = p.parent_action
    return actions[::-1]

In [None]:
for p in pred_parses:
    print(get_parse_actions(p))
    print()