In [40]:
from pprint import pprint

In [2]:
class Stack(object):
    def __init__(self, verbose=False):    
        self.stack = []
        self.verbose = verbose
    
    def tos(self):
        if self.len() == 0:
            return None
        #assert self.len() > 0, "Can't peek when stack is empty"
        return self.stack[-1]
    
    def pop(self):
        assert self.len() > 0, "Can't pop when stack is empty"
        item = self.stack.pop()
        if self.verbose:
            print("POPPING: %s" % item)
            print("LEN:     %i" % len(self.stack))
        return item
    
    def push(self, item):
        self.stack.append(item)
        if self.verbose:
            print("PUSHING: %s" % item)
            print("LEN:     %i" % len(self.stack))
    
    def len(self):
        return len(self.stack)

    def contains(self, item):
        return item in self.stack
    
    def __repr__(self):
        return "|".join(self.stack)

In [112]:
ROOT = "root"

def norm_arc(arc):
    return tuple(sorted(arc))

def norm_arcs(arcs):
    return set(map(norm_arc, arcs))

class Parser(object):
    def __init__(self, stack):
        self.stack = stack
        self.arcs = []
        self.normed_arcs = set()
        # nodes with heads
        self.children = set()
        self.actions = []

    def get_dependencies(self):
        return [(l, r) for (l, r) in self.arcs if r != ROOT and l != ROOT]

    def left_arc(self, buffer):
        tos = self.stack.pop()
        # Pre-condition
        # assert self.has_head(tos) == False
        arc = (tos, buffer)
        n_arc = norm_arc(arc)
        assert n_arc not in self.normed_arcs, "Arc already processed %s" % str(n_arc)
        self.arcs.append(arc)
        self.normed_arcs.add(arc)
        self.children.add(tos)
        self.actions.append("L ARC   : " + tos + "->" + buffer)

    def right_arc(self, buffer):
        tos = self.stack.tos()
        # normalize arc
        arc = (buffer, tos)
        n_arc = norm_arc(arc)
        assert n_arc not in self.normed_arcs, "Arc already processed %s" % str(n_arc)
        self.arcs.append(arc)
        self.normed_arcs.add(n_arc)
        self.actions.append("R ARC   : " + tos + "<-" + buffer)
        self.children.add(buffer)
        self.stack.push(buffer)

    def reduce(self):
        tos = self.stack.pop()
        # assert self.has_head(tos) == True
        self.actions.append("REDUCE  : Pop  %s" % tos)

    def shift(self, buffer):
        self.stack.push(buffer)
        self.actions.append("SHIFT   : Push %s" % buffer)

    def skip(self, buffer):
        self.actions.append("SKIP    : item %s" % buffer)

    def has_head(self, item):
        return item in self.children

    def in_stack(self, item):
        return self.stack.contains(item)

    def clone(self):
        cloney = Parser(self.stack.clone())
        cloney.arcs = list(self.arcs)
        cloney.normed_arcs = set(self.normed_arcs)
        # nodes with heads
        cloney.children = set(self.children)
        cloney.actions = list(self.actions)
        return cloney

In [113]:
from collections import defaultdict

SHIFT = "Shift"
REDUCE = "Reduce"
LARC = "LArc"
RARC = "Rarc"
SKIP = "Skip"

class Oracle(object):
    def __init__(self, crels, parser):
        self.parser = parser
        self.raw_crels = crels
        self.crels = norm_arcs(crels)  # type: Set[Tuple[str,str]]
        self.mapping = self.build_mappings(crels)

    def build_mappings(self, pairs):
        mapping = defaultdict(set)
        for c, res in pairs:
            mapping[c].add(res)
            mapping[res].add(c)
        return mapping

    def should_continue(self, action):
        # continue parsing if REDUCE or LARC
        return action in (REDUCE, LARC)

    def remove_relation(self, a, b):
        # as we can force it to execute actions that are invalid, we have to see if this is a valid relation to remove
        if a in self.mapping and b in self.mapping[a]:
            self.mapping[a].remove(b)
            if len(self.mapping[a]) == 0:
                del self.mapping[a]
            self.mapping[b].remove(a)
            if len(self.mapping[b]) == 0:
                del self.mapping[b]

    def consult(self, tos, buffer):
        """
        Performs optimal decision for parser
        If true, continue processing, else Consume Buffer
        """
        parser = self.parser
        a, b = norm_arc((tos, buffer))
        if (a, b) in self.crels:
            # TOS has arcs remaining? If so, we need RARC, else LARC
            if len(self.mapping[tos]) == 1:
                return LARC
            else:
                return RARC
        else:
            if buffer not in self.mapping:
                return SKIP
            # If the buffer has relations further down in the stack, we need to POP the TOS
            for item in self.mapping[buffer]:
                if item == tos:
                    continue
                if parser.in_stack(item):
                    return REDUCE
            # end for
            # ELSE
            return SHIFT

    def execute(self, action, tos, buffer):
        """
        Performs optimal decision for parser
        If true, continue processing, else Consume Buffer
        """
        parser = self.parser
        if action == LARC:
            parser.left_arc(buffer)
            self.remove_relation(tos, buffer)
        elif action == RARC:
            parser.right_arc(buffer)
            self.remove_relation(tos, buffer)
        elif action == REDUCE:
            parser.reduce()
        elif action == SHIFT:
            parser.shift(buffer)
        elif action == SKIP:
            parser.skip(buffer)
        else:
            raise Exception("Unknown parsing action %s" % action)
        return self.should_continue(action)

    def tos(self):
        return self.parser.stack.tos()

    def is_stack_empty(self):
        return self.parser.stack.len() == 0

    def clone(self):
        cloney = Oracle(set(self.raw_crels), self.parser.clone())
        # Need to ensure a deep clone of the mappings dict
        cloney.mapping = defaultdict(set)
        for key, set_vals in self.mapping.items():
            cloney.mapping[key].update(set_vals)
        return cloney

In [114]:
def test_oracle(codes, crels, orcl_fact, verbose=False):
    
    crels = set(crels)
    if verbose:
        prn_fun = lambda s="": print(s)
    else:
        prn_fun = lambda s="": None
    
    stack = Stack(False)
    stack.push(ROOT)
    parser = Parser(stack)
    oracle = orcl_fact(crels, parser)

    prn_fun("DEPS")
    for crel in sorted(crels):
        prn_fun("\t" + str(crel))
    prn_fun()

    PAD = 20
    LINE = PAD + len(ROOT) + 2 * len(codes) + 1

    for buffer in codes:
        prn_fun("-" * LINE)
        prn_fun(buffer)
        prn_fun("-" * LINE)

        while True:
            tos = stack.tos()
            action = oracle.consult(tos, buffer)
            if not oracle.execute(action, tos, buffer):
                prn_fun(parser.actions[-1].ljust(PAD) + " || STACK : " + str(stack))
                break

            prn_fun(parser.actions[-1].ljust(PAD) + " || STACK : " + str(stack))
            if stack.len() == 0:
                prn_fun("Empty stack, stopping")
                break

    prn_fun()
    prn_fun("*" * LINE)
    prn_fun("Stack")
    prn_fun("\t" + str(stack))
    deps = parser.get_dependencies()
    prn_fun("DEPS Actual")
    for crel in sorted(crels):
        prn_fun("\t" + str(crel))
    prn_fun("DEPS Pred")
    for dep in sorted(deps):
        prn_fun("\t" + str(dep))
    prn_fun("Actions")
    for a in parser.actions:
        prn_fun("\t" + a)
    prn_fun()
    prn_fun("Ordered Match?    " + str(set(deps) == crels))

    ndeps = norm_arcs(deps)
    ncrels = norm_arcs(crels)
    diff = (ndeps - ncrels).union(ncrels - ndeps)
    success = (len(diff) == 0)
    prn_fun("Un Ordered Match? " + str(success))
    if diff:
        prn_fun(diff)
    return success

In [99]:
test_pairs = []

test_pairs.append([
    ("A","B"),
])
test_pairs.append([
    ("A","B"),
    ("B","C"),
])
#C->B->A
test_pairs.append([
    ("C","B"),
    ("B","A"),
])
test_pairs.append([
    ("A","C"),
    ("B","C"),
])
test_pairs.append([
    ("A","B"),
    ("C","B"),
])
test_pairs.append([
    ("B","A"),
    ("B","C"),
])
test_pairs.append([
    ("A","C"),
    ("C","B"),
])

# Hard - has to flip relation
test_pairs.append([
    ("A","D"),
    ("D","B"),
    ("B","C"),
])
test_pairs.append([
    ("D","A"),
    ("D","B"),
    ("B","C"),
])
test_pairs.append([
    ("D","A"),
    ("B","D"),
    ("B","C"),
])

test_pairs.append([
    ("A","E"),
    ("E","B"),
    ("B","D"),
    ("D","C"),
])
test_pairs.append([
    ("A","D"),
    ("D","B"),
    ("B","C"),
    ("A", "F"),
    ("A", "E"),
])

test_pairs.append([
    ("A","D"),
    ("D","B"),
    ("B","C"),
    ("A", "F"),
    ("E", "F"),
])

oracle_fact = Oracle
for pairs in test_pairs:
    try:
        success = test_oracle("ABCDEF", pairs, oracle_fact, verbose=False)
    except:
        success = False
        
    if not success:
        print("Error for relations:")
        pprint(pairs)
        print()
        success = test_oracle("ABCDEF", pairs, oracle_fact, verbose=True)

## Visualize Parse for Tricker Graphs

### <span style="color:red">Doesn't Handle Cycles</span>
- So we remove the condition about only having a single parent

In [100]:
#[('1', '3'), ('1', '50'), ('3', '50')]
#['50', '1', '3']
pairs =[
    ("B","A"),
    ("B","C"),
    ("C","A"),
]
test_oracle("ABCDEF", pairs, Oracle, verbose=True)

DEPS
	('B', 'A')
	('B', 'C')
	('C', 'A')

-------------------------------------
A
-------------------------------------
SHIFT   : Push A     || STACK : root|A
-------------------------------------
B
-------------------------------------
R ARC   : A<-B       || STACK : root|A|B
-------------------------------------
C
-------------------------------------
L ARC   : B->C       || STACK : root|A
L ARC   : A->C       || STACK : root
L ARC   : A->C       || STACK : root
-------------------------------------
D
-------------------------------------
L ARC   : A->C       || STACK : root
-------------------------------------
E
-------------------------------------
L ARC   : A->C       || STACK : root
-------------------------------------
F
-------------------------------------
L ARC   : A->C       || STACK : root

*************************************
Stack
	root
DEPS Actual
	('B', 'A')
	('B', 'C')
	('C', 'A')
DEPS Pred
	('A', 'C')
	('B', 'A')
	('B', 'C')
Actions
	SHIFT   : Push A
	R ARC   : A<-B

True

In [101]:
pairs =[
    ("A","D"),
    ("D","B"),
    ("B","C"),
]
test_oracle("ABCDEF", pairs, Oracle, verbose=True)

DEPS
	('A', 'D')
	('B', 'C')
	('D', 'B')

-------------------------------------
A
-------------------------------------
SHIFT   : Push A     || STACK : root|A
-------------------------------------
B
-------------------------------------
SHIFT   : Push B     || STACK : root|A|B
-------------------------------------
C
-------------------------------------
R ARC   : B<-C       || STACK : root|A|B|C
-------------------------------------
D
-------------------------------------
REDUCE  : Pop  C     || STACK : root|A|B
L ARC   : B->D       || STACK : root|A
L ARC   : A->D       || STACK : root
L ARC   : A->D       || STACK : root
-------------------------------------
E
-------------------------------------
L ARC   : A->D       || STACK : root
-------------------------------------
F
-------------------------------------
L ARC   : A->D       || STACK : root

*************************************
Stack
	root
DEPS Actual
	('A', 'D')
	('B', 'C')
	('D', 'B')
DEPS Pred
	('A', 'D')
	('B', 'D')
	('C',

True

## Non Projective Parse Should Fail Test

In [102]:
pairs =[
    ("A","C"),
    ("B","E"),
]
try:
    success = test_oracle("ABCDEF", pairs, Oracle, verbose=True)
except Exception as e:
    success = False
    raise e
assert success == False

DEPS
	('A', 'C')
	('B', 'E')

-------------------------------------
A
-------------------------------------
SHIFT   : Push A     || STACK : root|A
-------------------------------------
B
-------------------------------------
SHIFT   : Push B     || STACK : root|A|B
-------------------------------------
C
-------------------------------------
REDUCE  : Pop  B     || STACK : root|A
L ARC   : A->C       || STACK : root
L ARC   : A->C       || STACK : root
-------------------------------------
D
-------------------------------------
L ARC   : A->C       || STACK : root
-------------------------------------
E
-------------------------------------
SHIFT   : Push E     || STACK : root|E
-------------------------------------
F
-------------------------------------
SHIFT   : Push E     || STACK : root|E

*************************************
Stack
	root|E
DEPS Actual
	('A', 'C')
	('B', 'E')
DEPS Pred
	('A', 'C')
Actions
	SHIFT   : Push A
	SHIFT   : Push B
	REDUCE  : Pop  B
	L ARC   : A->C
	SHIF

## Test on Real Causal Relations (Limit to 2 or More Relations in a Sentence)

In [103]:
def normalize(code):
    return code.replace("Causer:","").replace("Result:","")

def normalize_cr(cr):
    return tuple(normalize(cr).split("->"))

In [104]:
normalize("Causer:14"),normalize("Result:50")

('14', '50')

In [105]:
normalize_cr('Causer:14->Result:50')

('14', '50')

In [49]:
import pickle 

training_pickled = "/Users/simon.hughes/Google Drive/Phd/Data/CoralBleaching/Thesis_Dataset/training.pl"
with open(training_pickled, "rb+") as f:
    tagged_essays = pickle.load(f)
len(tagged_essays)

902

In [50]:
from collections import defaultdict

tag_freq = defaultdict(int)
unique_words = set()
for essay in tagged_essays:
    for sentence in essay.sentences:
        for word, tags in sentence:
            unique_words.add(word)
            for tag in tags:
                tag_freq[tag] += 1

EMPTY_TAG = "Empty"
#TODO - don't ignore Anaphor, other and rhetoricals here
cr_tags  = list((t for t in tag_freq.keys() if ( "->" in t) and not "Anaphor" in t and not "other" in t and not "rhetorical" in t))
reg_tags = set((t for t in tag_freq.keys() if ( "->" not in t) and (t == "explicit" or t[0].isdigit())))

In [87]:
from pprint import pprint

relations = []
unq_cr_tags = set(cr_tags)
skipped_sent = 0
skipped_crels = 0
num_sents = 0
num_csl = 0

diffs = []
for essay_ix, essay in enumerate(tagged_essays):
    for sent_ix, taggged_sentence in enumerate(essay.sentences):
        num_sents += 1
        tag_seq = []
        un_tags = set()
        un_csl  = set()
        crel2tags = defaultdict(set)

        def add_tag(tag, crel):
            un_tags.add(tag)
            crel2tags[crel].add(tag)
        
        has_causal = False
        last_tag = None
        for i, (wd,tags) in enumerate(taggged_sentence):
            rtags = set([normalize(t) for t in tags])
            rtags = rtags.intersection(reg_tags)
            # Get tag seq
            tag = None
            if rtags:
                tag = max(rtags, key = lambda t: tag_freq[t])
                if tag != last_tag and (not tag_seq or tag_seq[-1] != tag):
                    tag_seq.append(tag)
            last_tag = tag
            
            csl = unq_cr_tags.intersection(tags)
            if not csl:
                continue
            has_causal = True
            for crel in csl:
                un_csl.add(crel)
                l_causer, r_effect = crel.split("->")
                l,r = normalize_cr(crel)
                if l_causer in tags:
                    add_tag(l, crel)
                if r_effect in tags:
                    add_tag(r, crel)                
                if l in tags:
                    add_tag(l, crel)
                if r in tags:
                    add_tag(r, crel)
                    
        
        num_csl += len(un_csl)
        # Don't count sentences without any relations as skipped
        if has_causal:        
            supported_causal = set()
            supported_codes = set()
            for crel, tags in crel2tags.items():
                if len(tags) < 2:
                    l,r = normalize_cr(crel)
                    # if l == r then we want to keep these
                    if l != r or len(tags) == 0:
                        skipped_crels += 1
                        continue
                supported_causal.add(crel)
                supported_codes.update(tags)

            if not supported_causal:
                skipped_sent += 1
                continue
            # filter out any tags that were only part of unsupported causal relations
            #tag_seq = [tag for tag in tag_seq if tag in supported_codes]
            relations.append((essay_ix,sent_ix,supported_causal,tag_seq))
            
            if len(supported_causal) != len(un_csl):
                diffs.append((essay_ix, sent_ix, un_csl, supported_causal))
        else:
            if un_csl:
                diffs.append((essay_ix, sent_ix, un_csl, set()))
        
num_sents, len(relations), num_csl #skipped_sent, skipped_crels, 
#(8292, 2217, 3006)

(8292, 2217, 3006)

In [88]:
2217/3006

0.7375249500998003

## For the Unsupported Relations, are the Missing Tags in the Previous or Subsequent Sentence?

In [89]:
from pprint import pprint
def print_sentence(sentence):
    for wd, tags in sentence:
        print(wd.ljust(20), [t for t in tags if t[0].isdigit()])

for essay_ix, sent_ix, un_csl, supported_causal in diffs[0:10]:
    sentence = tagged_essays[essay_ix].sentences[sent_ix]
    pprint(un_csl)
    pprint(supported_causal)
    print_sentence(sentence)
    if sent_ix > 0:
        print("--Previous--")    
        print_sentence(tagged_essays[essay_ix].sentences[sent_ix-1])
    if sent_ix < len(tagged_essays[essay_ix].sentences)-1:
        print("--Next--")    
        print_sentence(tagged_essays[essay_ix].sentences[sent_ix+1])

    print() 

{'Causer:4->Result:14', 'Causer:3->Result:4'}
{'Causer:4->Result:14'}
the                  []
amount               ['4']
of                   ['4']
co2                  ['4']
can                  []
threaten             ['14']
the                  ['14']
health               ['14']
of                   ['14']
the                  ['14']
coral                ['14']
.                    []
--Previous--
as                   []
the                  []
temperature          ['3']
of                   ['3']
water                ['3']
increases            ['3']
.                    []

{'Causer:3->Result:6', 'Causer:1->Result:3'}
{'Causer:3->Result:6'}
corals               ['6']
stress               ['6']
from                 []
sea                  ['3']
temperatures         ['3']
increasing           ['3']
.                    []
--Previous--
plus                 []
,                    []
climate              []
change               []
affects              []
coral                ['50']
ble

## TODO 
- Re-train tagging model, adding tags where reg tag is missing but is included in a causer or result tag. 
- Also include explicit in the predicted tags.
- Need to handle relations where same code -> same code

## 4 Errors Below Look Are from Non-Projective Parses
**NOTES**
With only 4 errors as 4 missed relations, hardly worth worrying about. 
One solution would be to train a forward and a backward parser, parse the sentence in both directions and merge the deps. In each case that would pick up all deps.

In [106]:
for i, (essay_ix, sent_ix, supported_causal, tag_seq) in enumerate(relations[:]):
    supported_causal = sorted(supported_causal)
    crels = [normalize_cr(crel) for crel in supported_causal]
    for l,r in crels:
        if l == r:
            print(i, l,r)
            print(relations[i])
            print()

37 50 50
(24, 13, {'Causer:1->Result:50', 'Causer:50->Result:50'}, ['explicit', '1', '50', 'explicit', '50'])

493 50 50
(189, 4, {'Causer:13->Result:50', 'Causer:50->Result:50'}, ['explicit', '13', 'explicit', '50', 'explicit', '50'])

527 50 50
(197, 10, {'Causer:5b->Result:50', 'Causer:50->Result:50'}, ['5b', 'explicit', '50', 'explicit', '50'])

766 11 11
(276, 12, {'Causer:11->Result:11', 'Causer:3->Result:4'}, ['4', '3', 'explicit', '11'])



In [107]:
#Why is the last one missing the 11->11 relation?
tagged_essays[276].sentences[12]
#looks to be an unsupported relation

[('balance', set()),
 ('between', set()),
 ('co2', {'4', 'Causer:3->Result:4', 'Result', 'Result:4'}),
 ('and', {'Causer:3->Result:4'}),
 ('water', {'3', 'Causer', 'Causer:3', 'Causer:3->Result:4'}),
 ('temperature', {'3', 'Causer', 'Causer:3', 'Causer:3->Result:4'}),
 ('is', set()),
 ('also', set()),
 ('threaten', {'explicit'}),
 ('by', {'explicit'}),
 ('extreme',
  {'11',
   'Causer',
   'Causer:11',
   'Causer:11->Result:11',
   'Result',
   'Result:11'}),
 ('storms',
  {'11',
   'Causer',
   'Causer:11',
   'Causer:11->Result:11',
   'Result',
   'Result:11'}),
 ('.', set())]

In [124]:
errors = 0
exs = []
for e_ix,s_ix, supported_causal, tag_seq in relations[:]:
    supported_causal = sorted(supported_causal)
    crels = [normalize_cr(crel) for crel in supported_causal]

    try:
        success = test_oracle(tag_seq, crels, Oracle, verbose=False)
    except Exception as e:
        exs.append(e)
        success = False
        
    if not success:
        errors += 1
        print("Error for relations:", e_ix, ",", s_ix)
        pprint(crels)
        pprint(tag_seq)
        #print()
        #success = test_oracle(tag_seq, crels, Oracle, verbose=True)
        #break

Error for relations: 24 , 13
[('1', '50'), ('50', '50')]
['explicit', '1', '50', 'explicit', '50']
Error for relations: 33 , 3
[('1', '50'), ('1', '7'), ('3', '50'), ('3', '7')]
['1', '3', 'explicit', '50', 'explicit', '1', '3', '7']
Error for relations: 68 , 1
[('6', '50'), ('6', '7')]
['6', 'explicit', '50', 'explicit', '6', 'explicit', '7']
Error for relations: 75 , 2
[('11', '12'), ('11', '13'), ('12', '13'), ('13', '14')]
['13', 'explicit', '11', 'explicit', '12', 'explicit', '13', 'explicit', '14']
Error for relations: 129 , 11
[('1', '3'), ('1', '50')]
['1', 'explicit', '50', '1', 'explicit', '3']
Error for relations: 145 , 2
[('1', '3'), ('3', '4'), ('4', '14')]
['explicit',
 '50',
 'explicit',
 '1',
 'explicit',
 '3',
 'explicit',
 '3',
 'explicit',
 '4',
 'explicit',
 '14']
Error for relations: 158 , 1
[('1', '2')]
['1', 'explicit', '1', '2']
Error for relations: 174 , 4
[('1', '3'), ('3', '7'), ('7', '50')]
['1', 'explicit', '1', '3', 'explicit', '7', 'explicit', '50']
Error

In [159]:
def get_tags_relations_for(tagged_sentence, tag_freq, reg_tags, cr_tags):    
    most_common_tag = [None] # seed with None
    tag_seq = []
    
    crel_child_tags = defaultdict(set)    
    for i, (wd,tags) in enumerate(taggged_sentence):
        rtags = set([normalize(t) for t in tags])
        rtags = rtags.intersection(reg_tags)
        # Get tag seq
        tag = None
        if rtags:
            tag = max(rtags, key = lambda t: tag_freq[t])
            if tag != most_common_tag[-1]:
                # often there are single word gaps, skip over these
                if len(most_common_tag) < 3 or tag != most_common_tag[-2]: # at least 2 regular tags                    
                    tag_seq.append((tag,i))
        most_common_tag.append(tag)
        
        # to have child tags, need a tag sequence and a current valid regular tag
        if not tag or len(tag_seq) == 0:
            continue
            
        crels = cr_tags.intersection(tags)
        for crel in crels:
            l,r = normalize_cr(crel) 
            last_pair = tag_seq[-1]
            last_tag, tag_ix = last_pair
            if last_tag != tag:
                raise Exception("Tags don't match % s" % str((i,last_tag,tag)))                
            if tag in (l,r):
                crel_child_tags[crel].add(last_pair)
    return tag_seq, crel_child_tags

In [164]:
#normalize_cr("Causer:5->Result:10")

e_ix = 72
s_ix = 2
sentence = tagged_essays[e_ix].sentences[s_ix]
tag_seq, crel_children = get_tags_relations_for(sentence, tag_freq, reg_tags, set(cr_tags))
for pair in tag_seq:
    print(pair)
print("*" * 30)
for crel, kids in crel_children.items():
    print(crel)
    for k in kids:
        print(str(k))
    print()

('50', 1)
('explicit', 6)
('7', 9)
******************************
Causer:7->Result:50
('7', 9)
('50', 1)



In [146]:
def print_sentence_tags(tagged_sentence, tag_filter):
    for i, (wd,tags) in enumerate(taggged_sentence):
        tags = tags.intersection(tag_filter)
        stags = ""
        if tags:
            stags = ",".join(tags)
        print(str(i).ljust(3),wd.ljust(30), stags)

In [147]:
print_sentence_tags(sentence, reg_tags)

0   during                         
1   bleaching                      50
2   ,                              
3   corals                         50
4   turn                           50
5   white                          50
6   due                            explicit
7   to                             explicit
8   the                            
9   ejection                       7
10  or                             7
11  death                          7
12  ,                              
13  of                             7
14  the                            7
15  zooxanthellae                  7
16  algae                          7
17  .                              


In [149]:
crels     = [('11', '12'), ('11', '13'), ('12', '13'), ('13', '14')]
tag_seq   = ['13', 'explicit', '11', 'explicit', '12', 'explicit', '13', 'explicit', '14']
test_oracle(tag_seq, crels, Oracle, verbose=True)

DEPS
	('11', '12')
	('11', '13')
	('12', '13')
	('13', '14')

-------------------------------------------
13
-------------------------------------------
SHIFT   : Push 13    || STACK : root|13
-------------------------------------------
explicit
-------------------------------------------
SKIP    : item explicit || STACK : root|13
-------------------------------------------
11
-------------------------------------------
R ARC   : 13<-11     || STACK : root|13|11
-------------------------------------------
explicit
-------------------------------------------
SKIP    : item explicit || STACK : root|13|11
-------------------------------------------
12
-------------------------------------------
L ARC   : 11->12     || STACK : root|13
R ARC   : 13<-12     || STACK : root|13|12
-------------------------------------------
explicit
-------------------------------------------
SKIP    : item explicit || STACK : root|13|12
-------------------------------------------
13
--------------------------

AssertionError: Arc already processed ('12', '13')

In [119]:
crels = [('1', '3'), ('1', '50')]
tag_seq = ['1', 'explicit', '50', '1', 'explicit', '3']
test_oracle(tag_seq, crels, Oracle, verbose=True)

DEPS
	('1', '3')
	('1', '50')

-------------------------------------
1
-------------------------------------
SHIFT   : Push 1     || STACK : root|1
-------------------------------------
explicit
-------------------------------------
SKIP    : item explicit || STACK : root|1
-------------------------------------
50
-------------------------------------
R ARC   : 1<-50      || STACK : root|1|50
-------------------------------------
1
-------------------------------------


AssertionError: Arc already processed ('1', '50')

## <span style="color:red">NEED to determine if all errors are non-projective<span>

In [20]:
from pprint import pprint



In [21]:
test_oracle2(['5', '50'], [('5', '50')], Oracle2, verbose=True)

DEPS
	('5', '50')

-----------------------------
5
-----------------------------
SHIFT   : Push 5     || STACK : root|5
-----------------------------
50
-----------------------------
L ARC   : 5->50      || STACK : root
L ARC   : 5->50      || STACK : root

*****************************
Stack
	root
DEPS Actual
	('5', '50')
DEPS Pred
	('5', '50')
Actions
	SHIFT   : Push 5
	L ARC   : 5->50

Ordered Match?    True
Un Ordered Match? True


True

In [23]:
errors = 0
exs = []
for supported_causal, tag_seq in relations[:]:
    supported_causal = sorted(supported_causal)
    crels = [normalize_cr(crel) for crel in supported_causal]

    try:
        success = test_oracle2(tag_seq, crels, Oracle2, verbose=False)
    except Exception as e:
        exs.append(e)
        success = False
        
    if not success:
        errors += 1
        print("Error for relations:")
        pprint(crels)
        pprint(tag_seq)
        #print()
        #success = test_oracle(tag_seq, crels, Oracle, verbose=True)
        #break
errors

Error for relations:
[('1', '50'), ('50', '50')]
['1', '50']
Error for relations:
[('1', '50'), ('1', '7'), ('3', '50'), ('3', '7')]
['1', '3', '50', '1', '3', '7']
Error for relations:
[('6', '50'), ('6', '7')]
['6', '50', '6', '7']
Error for relations:
[('11', '12'), ('11', '13'), ('12', '13'), ('13', '14')]
['13', '11', '12', '13', '14']
Error for relations:
[('1', '3'), ('1', '50')]
['1', '50', '1', '3']
Error for relations:
[('1', '3'), ('1', '50'), ('7', '50')]
['50', '1', '3', '1', '7', '50']
Error for relations:
[('13', '50'), ('50', '50')]
['13', '50']
Error for relations:
[('50', '50'), ('5b', '50')]
['5b', '50']
Error for relations:
[('14', '50'), ('6', '14'), ('7', '50')]
['50', '6', '14', '50', '7']
Error for relations:
[('1', '2'), ('1', '3')]
['1', '2', '1', '3']
Error for relations:
[('3', '4'), ('3', '50'), ('4', '5')]
['3', '50', '3', '4', '5']
Error for relations:
[('11', '11'), ('3', '4')]
['4', '3', '11']
Error for relations:
[('13', '14'), ('13', '50'), ('14', '50

26