In [858]:
from AMRGraph import AMR
from AMRData import CustomizedAMR
from utilities import pretty_print, generate_action_sequence, generate_amr
import ActionSequenceGenerator
import NamedEntityReplacer
import re
import itertools
from operator import itemgetter

ImportError: cannot import name generate_amr

In [826]:
amr_string = """(c5 / crown-01~e.6
      :ARG1 (c / city :wiki "Hong_Kong" 
            :name (n / name :op1 "Hong"~e.0 :op2 "Kong"~e.1)) 
      :ARG2~e.7 (l2 / location :wiki - 
            :name (n2 / name :op1 "Hollywood"~e.8 :op2 "of"~e.9 :op3 "the"~e.10 :op4 "East"~e.11)) 
      :time (a2 / always~e.3))"""
sentence = """Hong Kong has always worn the crown of Hollywood of the East ."""
expected = """City has always worn the crown of name"""

In [827]:
amr = AMR.parse_string(amr_string)

In [828]:
custom_amr = generate_amr(amr_string, sentence)


Mappings between node variables and their corresponding concepts.

{'c': 'city', 'n': 'name', 'a2': 'always', 'l2': 'location', 'n2': 'name', 'c5': 'crown-01'}

Mappings between nodes and all the aligned tokens: If the nodes don't havea variable (polarity, literals, quantities, interrogatives), they specify both the aligned tokens and the parent in order to uniquely identify them

{u'Hong': [(u'0', 'n')], u'of': [(u'9', 'n2')], u'Kong': [(u'1', 'n')], 'a2': ['3'], u'the': [(u'10', 'n2')], u'East': [(u'11', 'n2')], u'Hollywood': [(u'8', 'n2')], 'c5': ['6']}

Mappings between relations and tokens. Uniquely identified by also specifying the parent of that relation.

{'ARG2': [[('7', 'c5')]]}

Mappings from a node to each child, along with the relation between them.

Key: Hong
Leaf

Key: Hong_Kong
Leaf

Key: a2
Leaf

Key: Hollywood
Leaf

Key: n
op1 -> Hong
op2 -> Kong

Key: the
Leaf

Key: c5
ARG1 -> c
ARG2 -> l2
time -> a2

Key: c
wiki -> Hong_Kong
name -> n

Key: East
Leaf

Key: of
Leaf


Find all the nodes which have a :name relation

In [829]:
 name_nodes = [(k, amr[k]["name"][0]) for k in amr if amr[k] and "name" in amr[k]]             

In [830]:
name_nodes

[('c', 'n'), ('l2', 'n2')]

Find all the literals which span over one named entity

In [831]:
literals_triplets = []
for name_tuple in name_nodes:
    op_regexp = re.compile("^op([0-9])+$")
    name_var = name_tuple[1]
    op_rel_list = amr[name_var]
    literals = []
    for op_rel in op_rel_list:
        if op_regexp.match(op_rel):
            literals.append(op_rel_list[op_rel][0])
    literals_triplets.append((name_tuple[0], name_tuple[1], literals))

So far we have the following info:
The **"root" variable of the named entity** (location, person, city, etc.), the **name variable** of the named entity and the **list of literals**.

In [832]:
literals_triplets

[('c', 'n', [u'Hong', u'Kong']),
 ('l2', 'n2', [u'East', u'Hollywood', u'of', u'the'])]

What we need to remove, add, replace:

From **node_to_concepts**: all the variables corresponding to name

From **node_to_tokens**: all the literals. We must replace them with the "root" variable of the named entity, pointing to the "super token" in which the composing literals are collapsed.

From the **amr dict**: All the entries with keys as the name variable or as string literal. We must replace the entry for the "root" variable of the name entity with an empty list, as we "pruned" its whole subtree. + **remove wiki entries**

Must **update the aignment info** of tokens which are to the right of the collapsed literals.

Must **hold info about the spanned literals** for each "super node" we created.

Must **create the new sentence** explictly.

First extract info about the spanned tokens

In [833]:
named_entities = []
for literals_triplet in literals_triplets:
    literals_list = literals_triplet[2]
    tokens = [int(amr.node_to_tokens[literal][0][0]) for literal in literals_list]
    named_entities.append((literals_triplet[0], literals_triplet[1], literals_triplet[2], min(tokens), max(tokens)))
        

In [834]:
named_entities

[('c', 'n', [u'Hong', u'Kong'], 0, 1),
 ('l2', 'n2', [u'East', u'Hollywood', u'of', u'the'], 8, 11)]

The named entities list contains all the information we need.
Next step: remove name variables from node_to_concepts.

In [835]:
name_variables = [n[1] for n in named_entities]

In [836]:
name_variables

['n', 'n2']

In [837]:
amr.node_to_concepts = dict((key, value) for key, value in amr.node_to_concepts.iteritems() 
                                        if key not in name_variables)

In [838]:
amr.node_to_concepts

{'a2': 'always', 'c': 'city', 'c5': 'crown-01', 'l2': 'location'}

Remove literals from node_to_tokens

In [839]:
literals = sum([n[2] for n in named_entities], [])

In [840]:
literals

[u'Hong', u'Kong', u'East', u'Hollywood', u'of', u'the']

In [841]:
amr.node_to_tokens = dict((key, value) for key, value in amr.node_to_tokens.iteritems() 
                                        if key not in literals)

In [842]:
amr.node_to_tokens

{'a2': ['3'], 'c5': ['6']}

Remove name vars and literals from amr dict

In [843]:
for l in literals:
    if l in amr.keys():
        amr.pop(l)
for n in name_variables:
    if n in amr.keys():
        amr.pop(n)

In [844]:
amr

AMR(util.ListMap,
    {'-': ListMap(list, {}),
     u'Hong_Kong': ListMap(list, {}),
     'a2': ListMap(list, {}),
     'c': ListMap(list, {'name': [('n',)], 'wiki': [(u'Hong_Kong',)]}),
     'c5': ListMap(list,
             {'ARG1': [('c',)], 'ARG2': [('l2',)], 'time': [('a2',)]}),
     'l2': ListMap(list, {'name': [('n2',)], 'wiki': [('-',)]})})

Update name roots and remove wiki nodes

In [845]:
name_roots = [n[0] for n in named_entities]

In [846]:
name_roots

['c', 'l2']

In [847]:
for name_root in name_roots:
    if "wiki" in amr[name_root].keys():
        if amr[name_root]["wiki"][0] in amr.keys():
            amr.pop(amr[name_root]["wiki"][0])
    amr[name_root] = []

In [848]:
amr

AMR(util.ListMap,
    {'a2': ListMap(list, {}),
     'c': [],
     'c5': ListMap(list,
             {'ARG1': [('c',)], 'ARG2': [('l2',)], 'time': [('a2',)]}),
     'l2': []})

Add node_to_tokens for the named entities "roots", with token as the "min" token in the literals group

In [849]:
named_entities = sorted(named_entities, key=itemgetter(3))

In [850]:
named_entities

[('c', 'n', [u'Hong', u'Kong'], 0, 1),
 ('l2', 'n2', [u'East', u'Hollywood', u'of', u'the'], 8, 11)]

In [851]:
tokens = sentence.split(" ")
total_displacement = 0
for named_entity in named_entities:
    span_min = named_entity[3]
    span_max = named_entity[4]
    for n in amr.node_to_tokens:
        amr.node_to_tokens[n] = [t if int(t) < span_max
                                   else int(t) - (span_max - span_min)
                                   for t in amr.node_to_tokens[n]]
    amr.node_to_tokens[named_entity[0]] = [named_entity[3] - total_displacement]
    tokens = [tokens[:(span_min - total_displacement)] + 
                [amr.node_to_concepts[named_entity[0]]] + 
                tokens[(span_max - total_displacement + 1):]][0]
    total_displacement = total_displacement + span_max - span_min
sentence = ' '.join(t for t in tokens)

In [852]:
amr.node_to_tokens

{'a2': [2], 'c': [0], 'c5': [5], 'l2': [7]}

In [853]:
tokens

['city', 'has', 'always', 'worn', 'the', 'crown', 'of', 'location', '.']

In [854]:
sentence

'city has always worn the crown of location .'

# Another example

In [855]:
amr_string = """(b / become-01~e.6 
      :ARG1 (a / area~e.4 
            :mod (t / this~e.3)) 
      :ARG2 (z / zone~e.9 
            :ARG1-of (p / prohibit-01~e.8) 
            :part-of~e.10 (c / city :wiki "Hong_Kong" 
                  :name (n / name :op1 "Hong"~e.11 :op2 "Kong"~e.12))) 
      :time (s / since~e.0 
            :op1 (t2 / then~e.1)))"""
sentence = """Since then , this area has become a prohibited zone in Hong Kong ."""

In [856]:
amr = AMR.parse_string(amr_string)

In [857]:
(new_amr, new_sentence, named_entities) = NamedEntityReplacer.replace_named_entities(amr, sentence)

NameError: name 'NamedEntityReplacer' is not defined