### This demo presents the pipeline flow for parsing a sentence into Abstract Meaning Representation.

#### We begin by looking at a sentence and its human-annotated Abstract Meaning Representation graph:

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from AMRGraph import AMR
from AMRData import CustomizedAMR
from utilities import pretty_print, generate_action_sequence, generate_custom_amr
import preprocessing.ActionSequenceGenerator as asc
from preprocessing.DependencyExtractor import extract_dependencies
from preprocessing import TokensReplacer
from keras_lstm_flow import test
from postprocessing import ActionSequenceReconstruction as asr
from smatch import smatch_amr
from smatch import smatch_util
from deep_dynet import support

Using TensorFlow backend.


/Users/silvianac/personalprojects/AMR_lic/plots_keras


In [28]:
model1 = "proxy_epochs=40_maxlen=20_embeddingsdim=300"
max_len1=20
embeddings_dim1=300

model2 = "all_epochs=15_maxlen=30_embeddingsdim=300"
max_len2=30
embeddings_dim2=300

<img src="./img/demo_simple_example.png">

In [3]:
sentence = "It looks like we will also bring in whales ."
amr_str = """(l / look-02~e.1
      :ARG1~e.2 (b / bring-01~e.6
            :ARG0 (w / we~e.3)
            :ARG1~e.7 (w2 / whale~e.8)
            :mod (a / also~e.5)))"""
amr = AMR.parse_string(amr_str)

In [4]:
custom_amr = generate_custom_amr(amr)


Mappings between node variables and their corresponding concepts.

{'a': 'also', 'b': 'bring-01', 'w2': 'whale', 'w': 'we', 'l': 'look-02'}

Mappings between nodes and all the aligned tokens: If the nodes don't havea variable (polarity, literals, quantities, interrogatives), they specify both the aligned tokens and the parent in order to uniquely identify them

{'a': ['5'], 'b': ['6'], 'w2': ['8'], 'w': ['3'], 'l': ['1']}

Mappings between relations and tokens. Uniquely identified by also specifying the parent of that relation.

{'ARG1': [[('7', 'b')], [('2', 'l')]]}

Mappings from a node to each child, along with the relation between them.

Key: a
Leaf

Key: b
ARG0 -> w
ARG1 -> w2
mod -> a

Key: w2
Leaf

Key: w
Leaf

Key: l
ARG1 -> b


All the nodes in the amr should appear here.

['a', 'b', 'w2', 'w', 'l']

Creating custom AMR.


Custom AMR token to concepts dict

{8: ('w2', 'whale'), 1: ('l', 'look-02'), 3: ('w', 'we'), 5: ('a', 'also'), 6: ('b', 'bring-01')}

Custom AMR relations 

#### Having the sentence and the CustomAMR structure, we can now generate the <span style="color:red">*oracle action sequence*</span>
It looks like we will also bring in whales .
```
(l / look-02~e.1
      :ARG1~e.2 (b / bring-01~e.6
            :ARG0 (w / we~e.3)
            :ARG1~e.7 (w2 / whale~e.8)
            :mod (a / also~e.5)))
```

In [5]:
actions = asc.generate_action_sequence(custom_amr, sentence)

In [6]:
actions

['DN',
 'SH_look-02_l',
 'DN',
 'SH_we_w',
 'DN',
 'SH_also_a',
 'SH_bring-01_b',
 'RL_mod',
 'RL_ARG0',
 'DN',
 'SH_whale_w2',
 'RR_ARG1',
 'RR_ARG1',
 'DN']

#### We next extract the <span style="color:red">dependencies</span> between the tokens in the sentence.

In [7]:
deps = extract_dependencies(sentence)

It(0) looks(1) like(2) we(3) will(4) also(5) bring(6) in(7) whales(8) .

In [8]:
deps

{0: (1, 'nsubj'),
 2: (6, 'mark'),
 3: (6, 'nsubj'),
 4: (6, 'aux'),
 5: (6, 'advmod'),
 6: (1, 'advcl'),
 8: (6, 'prep_in')}

#### We now have all the data necessary for making the prediction using the <span style="color:red">single LSTM model</span>

In [9]:
test(model_name=model1, tokenizer_path="./tokenizers/full_tokenizer.dump",
     data=[(sentence, actions, amr_str, deps)], max_len=max_len1, embedding_dim=embeddings_dim1)

Model path is:
./models/proxy_epochs=40_maxlen=20_embeddingsdim=300
Word index len: 
7107
Test data shape: 
(1, 10)
(1, 14)
(1,)
1
Found 400000 word vectors.
Embedding match for volume-quantity
Embedding match for distance-quantity
Embedding match for energy-quantity
Embedding match for power-quantity
Embedding match for mass-quantity
Embedding match for monetary-quantity
Embedding match for temporal-quantity
Embedding match for date-entity
Not found: ['around~', 'blowjobs', "don'cha", 'it...', 'dumbfuck', 'statement/report', '25+', 'booyah', 'railway-line', 'though...', 'decisiohttp://www.historyofwar.org/articles/wars_downfall3.htmln', "'?", '\'"', "'.", 'href="http://themamas.org/about-the-madison-area-music-awards/mission-vision-statements/">', 'miscoding', 'colenel', 'obamabots', '............', '!!!!!!', 'k00', 'http://news.cnet.com/8301-1035_3-57572850-94/doj-lets-important-deadline-pass-in-t-mobile-metropcs-deal/', 'remaning', 'theblaze.com', 'nannystate', 'right..', 'http://ww

[[3, 0, 0, 0, 1, 3, 0, 0, 1, 3, 2, 3, 3, 2]]

#### We now look at a more complex example, which also contains a <span style="color:red">Named-Entity</span>.  Named entities are identified and replaced at preprocessing.

<img src="./img/demo_ne_example.png">

In [10]:
sentence = "upgrade fire control systems of Indian tanks ."

amr_str= """(u / upgrade-02~e.0 
      :ARG1 (s / system~e.3 
            :ARG0-of (c / control-01~e.2 
                  :ARG1 (f / fire-01~e.1)) 
            :poss~e.4 (t / tank~e.6 
                  :mod (c2 / country :wiki "India" 
                        :name (n / name :op1 "India"~e.5)))))"""
amr = AMR.parse_string(amr_str)

#### We replace the named-entities

In [11]:
concepts_metadata = {}
(new_amr, new_sentence, named_entities) = TokensReplacer.replace_named_entities(amr, sentence)
for name_entity in named_entities:
    concepts_metadata[name_entity[0]] = name_entity[5]

In [12]:
new_sentence

'upgrade fire control systems of country tanks .'

In [13]:
amr

AMR(util.ListMap,
    {u'India': ListMap(list, {}),
     'c': ListMap(list, {'ARG1': [('f',)]}),
     'c2': ListMap(list, {'name': [('n',)], 'wiki': [(u'India',)]}),
     'f': ListMap(list, {}),
     'n': ListMap(list, {'op1': [(u'India',)]}),
     's': ListMap(list, {'ARG0-of': [('c',)], 'poss': [('t',)]}),
     't': ListMap(list, {'mod': [('c2',)]}),
     'u': ListMap(list, {'ARG1': [('s',)]})})

In [14]:
named_entities

[('c2', 'n', [u'India'], 5, 5, <amr_util.Node.Node instance at 0x1360dd098>)]

In [15]:
new_amr

AMR(util.ListMap,
    {'c': ListMap(list, {'ARG1': [('f',)]}),
     'c2': {},
     'f': ListMap(list, {}),
     's': ListMap(list, {'ARG0-of': [('c',)], 'poss': [('t',)]}),
     't': ListMap(list, {'mod': [('c2',)]}),
     'u': ListMap(list, {'ARG1': [('s',)]})})

In [16]:
print concepts_metadata['c2'].amr_print()

( d1 / country 
	:name  ( d1_1 / name 
		:op1 ""India""
	)
	:wiki ""India""
)


#### We now generate the action sequence for the preprocessed AMR graph

In [17]:
custom_amr = generate_custom_amr(new_amr)


Mappings between node variables and their corresponding concepts.

{'c': 'control-01', 'f': 'fire-01', 's': 'system', 'u': 'upgrade-02', 't': 'tank', 'c2': 'country'}

Mappings between nodes and all the aligned tokens: If the nodes don't havea variable (polarity, literals, quantities, interrogatives), they specify both the aligned tokens and the parent in order to uniquely identify them

{'c': ['2'], 'f': ['1'], 's': ['3'], 'u': ['0'], 't': ['6'], 'c2': [5]}

Mappings between relations and tokens. Uniquely identified by also specifying the parent of that relation.

{'poss': [[('4', 's')]]}

Mappings from a node to each child, along with the relation between them.

Key: c
ARG1 -> f

Key: f
Leaf

Key: s
ARG0-of -> c
poss -> t

Key: u
ARG1 -> s

Key: t
mod -> c2

Key: c2
Leaf


All the nodes in the amr should appear here.

['c', 'f', 's', 'u', 't', 'c2']

Creating custom AMR.


Custom AMR token to concepts dict

{0: ('u', 'upgrade-02'), 1: ('f', 'fire-01'), 2: ('c', 'control-01'), 3: ('s

In [18]:
actions = asc.generate_action_sequence(custom_amr, new_sentence)

In [19]:
actions

['SH_upgrade-02_u',
 'SH_fire-01_f',
 'SH_control-01_c',
 'RL_ARG1',
 'SH_system_s',
 'RL_ARG0-of',
 'DN',
 'SH_country_c2',
 'SH_tank_t',
 'RL_mod',
 'RR_poss',
 'RR_ARG1',
 'DN']

In [20]:
deps = extract_dependencies(new_sentence)

In [21]:
deps

{1: (3, 'nn'), 2: (3, 'nn'), 3: (0, 'dobj'), 5: (6, 'nn'), 6: (3, 'prep_of')}

In [22]:
predictions = test(model_name=model1, tokenizer_path="./tokenizers/full_tokenizer.dump",
     data=[(new_sentence, actions, amr_str, deps)], max_len=max_len1, embedding_dim=embeddings_dim1)

Model path is:
./models/proxy_epochs=40_maxlen=20_embeddingsdim=300
Word index len: 
7107
Test data shape: 
(1, 8)
(1, 13)
(1,)
1
Found 400000 word vectors.
Embedding match for volume-quantity
Embedding match for distance-quantity
Embedding match for energy-quantity
Embedding match for power-quantity
Embedding match for mass-quantity
Embedding match for monetary-quantity
Embedding match for temporal-quantity
Embedding match for date-entity
Not found: ['around~', 'blowjobs', "don'cha", 'it...', 'dumbfuck', 'statement/report', '25+', 'booyah', 'railway-line', 'though...', 'decisiohttp://www.historyofwar.org/articles/wars_downfall3.htmln', "'?", '\'"', "'.", 'href="http://themamas.org/about-the-madison-area-music-awards/mission-vision-statements/">', 'miscoding', 'colenel', 'obamabots', '............', '!!!!!!', 'k00', 'http://news.cnet.com/8301-1035_3-57572850-94/doj-lets-important-deadline-pass-in-t-mobile-metropcs-deal/', 'remaning', 'theblaze.com', 'nannystate', 'right..', 'http://www

#### Our score is penalized by the fact that the whole Named-Entity subtree is pruned. We now show how the subtree is recovered based on the named entity metadata. From the named entities meta-data we extract the list of literals ("India") and the beginning index.

In [23]:
named_entities = [(n[3], n[2]) for n in named_entities]

In [24]:
named_entities

[(5, [u'India'])]

In [25]:
predictions[0]

[0, 0, 0, 1, 0, 1, 3, 0, 0, 1, 2, 3, 2]

In [26]:
vocab_acts = support.Vocab.from_list(['SH', 'RL', 'RR', 'DN', 'SW'])
action_objects = support.oracle_actions_to_action_index(actions, vocab_acts)
action_indices = [a.index for a in action_objects]
action_labels = [a.label for a in action_objects]

act = asr.ActionConceptTransfer()
act.load_from_action_and_label(action_indices, action_labels)
pred_label = act.populate_new_actions(predictions[0])
print 'Predictions with old labels: '
print pred_label
predicted_amr_str = asr.reconstruct_all_ne(pred_label, named_entities, [])


smatch_results = smatch_util.SmatchAccumulator()
original_amr = smatch_amr.AMR.parse_AMR_line(amr_str)
predicted_amr = smatch_amr.AMR.parse_AMR_line(predicted_amr_str)
smatch_f_score = smatch_results.compute_and_add(predicted_amr, original_amr)

print 'Original Amr'
print amr_str
print 'Predicted Amr'
print predicted_amr_str
print 'Smatch f-score %f' % smatch_f_score

Predictions with old labels: 
['SH_upgrade-02', 'SH_fire-01', 'SH_control-01', 'RL_ARG1', 'SH_system', 'RL_ARG0-of', 'DN', 'SH_country', 'SH_tank', 'RL_mod', 'RR_poss', 'DN', 'RR_ARG1']
Original Amr
(u / upgrade-02~e.0 
      :ARG1 (s / system~e.3 
            :ARG0-of (c / control-01~e.2 
                  :ARG1 (f / fire-01~e.1)) 
            :poss~e.4 (t / tank~e.6 
                  :mod (c2 / country :wiki "India" 
                        :name (n / name :op1 "India"~e.5)))))
Predicted Amr
( d1 / upgrade-02 
	:ARG1  ( d1_1 / system 
		:ARG0-of  ( d1_1_1 / control-01 
			:ARG1  ( d1_1_1_1 / fire-01 )
		)
		:poss  ( d1_1_2 / tank 
			:mod  ( d1_1_2_1 / country 
				:wiki "India"
				:name  ( d1_1_2_1_1 / name 
					:op1 "India"
				)
			)
		)
	)
)
Smatch f-score 1.000000


#### A sentence with Swap. Swap is hard to predict because it appears very rarely in the dataset.

In [29]:
amr_str = """(s2 / seem-01~e.1
       :ARG1~e.3 (h / have-03~e.8
             :ARG0 (w / we~e.7)
             :ARG1 (s / scheme~e.10
                   :mod (p / plan-01~e.15
                   :ARG1 (r / renovate-01~e.14)
                         :ARG1-of (m / major-02~e.13))
                   :purpose (f / future~e.5))))"""
sentence = """It seems that for the future , we have a scheme , a major renovation plan ."""
amr = AMR.parse_string(amr_str)
custom_amr = generate_custom_amr(amr)
actions = generate_action_sequence(custom_amr, sentence)
print actions
deps = extract_dependencies(sentence)
predictions = test(model_name=model2, tokenizer_path="./tokenizers/full_tokenizer.dump",
     data=[(sentence, actions, amr_str, deps)], max_len=max_len2, embedding_dim=embeddings_dim2)


Mappings between node variables and their corresponding concepts.

{'f': 'future', 'h': 'have-03', 'm': 'major-02', 'p': 'plan-01', 's': 'scheme', 'r': 'renovate-01', 'w': 'we', 's2': 'seem-01'}

Mappings between nodes and all the aligned tokens: If the nodes don't havea variable (polarity, literals, quantities, interrogatives), they specify both the aligned tokens and the parent in order to uniquely identify them

{'f': ['5'], 'h': ['8'], 'm': ['13'], 'p': ['15'], 's': ['10'], 'r': ['14'], 'w': ['7'], 's2': ['1']}

Mappings between relations and tokens. Uniquely identified by also specifying the parent of that relation.

{'ARG1': [[('3', 's2')]]}

Mappings from a node to each child, along with the relation between them.

Key: f
Leaf

Key: h
ARG0 -> w
ARG1 -> s

Key: m
Leaf

Key: p
ARG1 -> r
ARG1-of -> m

Key: s
purpose -> f
mod -> p

Key: r
Leaf

Key: w
Leaf

Key: s2
ARG1 -> h


All the nodes in the amr should appear here.

['f', 'h', 'm', 'p', 's', 'r', 'w', 's2']

Creating custom A

#### We will now test a sentence with a <span style="color:red">date-entity</span> in it. We preprocess the date-entity.

In [30]:
amr_str = """(d / difficult~e.5 
      :domain~e.4 (r / reach-01~e.7 
            :ARG1 (c / consensus~e.0 
                  :topic~e.1 (c2 / country :wiki "India" 
                        :name (n / name :op1 "India"~e.2))) 
            :time~e.8 (m / meet-03~e.11 
                  :ARG0 (o / organization :wiki "Nuclear_Suppliers_Group" 
                        :name (n2 / name :op1 "NSG"~e.10)) 
                  :time~e.12 (d2 / date-entity :year 2007~e.14 :month~e.13 11~e.13))))"""
sentence = """Consensus on India will be difficult to reach when the NSG meets in November 2007 ."""
amr = AMR.parse_string(amr_str)
(new_amr, new_sentence, named_entities) = TokensReplacer.replace_named_entities(amr, sentence)

In [31]:
new_sentence

'Consensus on country will be difficult to reach when the organization meets in November 2007 .'

In [32]:
named_entities

[('c2', 'n', [u'India'], 2, 2, <amr_util.Node.Node instance at 0x13967e5a8>),
 ('o', 'n2', [u'NSG'], 10, 10, <amr_util.Node.Node instance at 0x13967e998>)]

In [33]:
(new_amr, new_sentence, date_entities) = TokensReplacer.replace_date_entities(new_amr, new_sentence)

In [34]:
new_sentence

'Consensus on country will be difficult to reach when the organization meets in date-entity .'

#### For <span style="color:red">date-entities</span> we store information concerning the concept, quantity pair

In [35]:
date_entities

[('d2',
  ['11', '2007'],
  ['month', 'year'],
  13,
  14,
  <amr_util.Node.Node instance at 0x1389f6518>)]

In [36]:
custom_amr = generate_custom_amr(new_amr)
actions = generate_action_sequence(custom_amr, new_sentence)
print actions
deps = extract_dependencies(sentence)


Mappings between node variables and their corresponding concepts.

{'c': 'consensus', 'd': 'difficult', 'm': 'meet-03', 'o': 'organization', 'r': 'reach-01', 'c2': 'country', 'd2': 'date-entity'}

Mappings between nodes and all the aligned tokens: If the nodes don't havea variable (polarity, literals, quantities, interrogatives), they specify both the aligned tokens and the parent in order to uniquely identify them

{'c': ['0'], 'd': ['5'], 'm': ['11'], 'o': ['10'], 'r': ['7'], 'c2': ['2'], 'd2': [13]}

Mappings between relations and tokens. Uniquely identified by also specifying the parent of that relation.

{'topic': [[('1', 'c')]], 'domain': [[('4', 'd')]], 'time': [[('12', 'm')], [('8', 'r')]], 'month': [[('13', 'd2')]]}

Mappings from a node to each child, along with the relation between them.

Key: c
topic -> c2

Key: d
domain -> r

Key: m
ARG0 -> o
time -> d2

Key: o
Leaf

Key: r
ARG1 -> c
time -> m

Key: c2
Leaf

Key: d2
Leaf


All the nodes in the amr should appear here.

['c

In [37]:
predictions = test(model_name=model2, tokenizer_path="./tokenizers/full_tokenizer.dump",
     data=[(new_sentence, actions, amr_str, deps)], max_len=max_len2, embedding_dim=embeddings_dim2)

Model path is:
./models/all_epochs=15_maxlen=30_embeddingsdim=300
Word index len: 
7107
Test data shape: 
(1, 15)
(1, 22)
(1,)
1
Found 400000 word vectors.
Embedding match for volume-quantity
Embedding match for distance-quantity
Embedding match for energy-quantity
Embedding match for power-quantity
Embedding match for mass-quantity
Embedding match for monetary-quantity
Embedding match for temporal-quantity
Embedding match for date-entity
Not found: ['around~', 'blowjobs', "don'cha", 'it...', 'dumbfuck', 'statement/report', '25+', 'booyah', 'railway-line', 'though...', 'decisiohttp://www.historyofwar.org/articles/wars_downfall3.htmln', "'?", '\'"', "'.", 'href="http://themamas.org/about-the-madison-area-music-awards/mission-vision-statements/">', 'miscoding', 'colenel', 'obamabots', '............', '!!!!!!', 'k00', 'http://news.cnet.com/8301-1035_3-57572850-94/doj-lets-important-deadline-pass-in-t-mobile-metropcs-deal/', 'remaning', 'theblaze.com', 'nannystate', 'right..', 'http://www.

In [38]:
named_entities = [(n[3], n[2]) for n in named_entities]
date_entities = [(d[3], d[2], d[1]) for d in date_entities]

In [39]:
date_entities

[(13, ['month', 'year'], ['11', '2007'])]

In [40]:
vocab_acts = support.Vocab.from_list(['SH', 'RL', 'RR', 'DN', 'SW'])
action_objects = support.oracle_actions_to_action_index(actions, vocab_acts)
action_indices = [a.index for a in action_objects]
action_labels = [a.label for a in action_objects]

act = asr.ActionConceptTransfer()
act.load_from_action_and_label(action_indices, action_labels)
pred_label = act.populate_new_actions(predictions[0])
print 'Predictions with old labels: '
print pred_label
predicted_amr_str = asr.reconstruct_all_ne(pred_label, named_entities, date_entities)


smatch_results = smatch_util.SmatchAccumulator()
original_amr = smatch_amr.AMR.parse_AMR_line(amr_str)
predicted_amr = smatch_amr.AMR.parse_AMR_line(predicted_amr_str)
smatch_f_score = smatch_results.compute_and_add(predicted_amr, original_amr)

print 'Original Amr'
print amr_str
print 'Predicted Amr'
print predicted_amr_str
print 'Smatch f-score %f' % smatch_f_score

Predictions with old labels: 
['SH_consensus', 'DN', 'SH_country', 'DN', 'DN', 'SH_difficult', 'RL_topic', 'DN', 'DN', 'SH_reach-01', 'DN', 'SH_organization', 'SH_meet-03', 'RL_ARG0', 'RL_time', 'RR_time', 'DN', 'SH_date-entity', 'RR_ARG1', 'DN', 'RR_domain']
Original Amr
(d / difficult~e.5 
      :domain~e.4 (r / reach-01~e.7 
            :ARG1 (c / consensus~e.0 
                  :topic~e.1 (c2 / country :wiki "India" 
                        :name (n / name :op1 "India"~e.2))) 
            :time~e.8 (m / meet-03~e.11 
                  :ARG0 (o / organization :wiki "Nuclear_Suppliers_Group" 
                        :name (n2 / name :op1 "NSG"~e.10)) 
                  :time~e.12 (d2 / date-entity :year 2007~e.14 :month~e.13 11~e.13))))
Predicted Amr
( d1 / consensus 
	:domain  ( d1_1 / difficult 
		:topic  ( d1_1_1 / country 
			:wiki "India"
			:name  ( d1_1_1_1 / name 
				:op1 "India"
			)
		)
		:time  ( d1_1_2 / meet-03 
			:ARG0  ( d1_1_2_1 / organization 
				:wiki "NSG"
				