In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from AMRGraph import AMR
from AMRData import CustomizedAMR
from utilities import pretty_print, generate_action_sequence, generate_custom_amr
import preprocessing.ActionSequenceGenerator as asc
from preprocessing.DependencyExtractor import extract_dependencies
from preprocessing import TokensReplacer
from keras_lstm_flow import test_without_amr
from postprocessing import ActionSequenceReconstruction as asr
import TestDataExtractor
import preprocessing.NamedEntitiesReplacer as ner

Using TensorFlow backend.


/home/andreea/Desktop/licenta/plots_keras


In [2]:
sentence = "During a time of prosperity and happiness , such a big earthquake suddenly struck"
amr = """(s / strike-01~e.13 
      :ARG2 (e / earthquake~e.11 
            :mod (b / big~e.10 
                  :mod (s2 / such~e.8))) 
      :time~e.0,2 (t / time~e.2 
            :op1~e.3 (p / prosper-01~e.4) 
            :op2 (h / happiness~e.6)) 
      :manner~e.12 (s3 / sudden~e.12))"""

#  We transform the input into a custom structure

In [3]:
amrStruct = AMR.parse_string(amr)
generate_custom_amr(amrStruct)


Mappings between node variables and their corresponding concepts.

{'b': 'big', 'e': 'earthquake', 's3': 'sudden', 'h': 'happiness', 'p': 'prosper-01', 's': 'strike-01', 't': 'time', 's2': 'such'}

Mappings between nodes and all the aligned tokens: If the nodes don't havea variable (polarity, literals, quantities, interrogatives), they specify both the aligned tokens and the parent in order to uniquely identify them

{'b': ['10'], 'e': ['11'], 's3': ['12'], 'h': ['6'], 'p': ['4'], 's': ['13'], 't': ['2'], 's2': ['8']}

Mappings between relations and tokens. Uniquely identified by also specifying the parent of that relation.

{'manner': [[('12', 's')]], 'op1': [[('3', 't')]], 'time': [[('0', 's')], ('2', 's')]}

Mappings from a node to each child, along with the relation between them.

Key: b
mod -> s2

Key: e
mod -> b

Key: s3
Leaf

Key: h
Leaf

Key: p
Leaf

Key: s
ARG2 -> e
manner -> s3
time -> t

Key: t
op1 -> p
op2 -> h

Key: s2
Leaf


All the nodes in the amr should appear here.



<AMRData.CustomizedAMR instance at 0x7f1dec0e8440>

# Now we take an example which contains 2 named entities

In [4]:
sentence = "Xinghui accepted this order of ours and deployed all the buses of other travel agencies in Sydney ."
amr = """(a / and~e.6 
      :op1 (a2 / accept-01~e.1 
            :ARG0 (c / company :wiki - 
                  :name (n2 / name :op1 "Xinghui"~e.0)) 
            :ARG1 (t / thing~e.3 
                  :ARG1-of~e.3 (o / order-02~e.3 
                        :ARG0~e.4 (w / we~e.5)) 
                  :mod (t3 / this~e.2))) 
      :op2 (d / deploy-01~e.7 
            :ARG0 c 
            :ARG1 (b / bus~e.10 
                  :mod (a3 / all~e.8) 
                  :poss~e.11 (a4 / agency~e.14 
                        :mod (t2 / travel-01~e.13) 
                        :mod (o2 / other~e.12) 
                        :location~e.15 (c2 / city :wiki "Sydney" 
                              :name (n / name :op1 "Sydney"~e.16))))))"""
amrStruct = AMR.parse_string(amr)
customAMR = generate_custom_amr(amrStruct)


Mappings between node variables and their corresponding concepts.

{'a': 'and', 'c': 'company', 'b': 'bus', 'd': 'deploy-01', 't2': 'travel-01', 't3': 'this', 'o': 'order-02', 'n': 'name', 'a3': 'all', 'a2': 'accept-01', 't': 'thing', 'w': 'we', 'c2': 'city', 'n2': 'name', 'o2': 'other', 'a4': 'agency'}

Mappings between nodes and all the aligned tokens: If the nodes don't havea variable (polarity, literals, quantities, interrogatives), they specify both the aligned tokens and the parent in order to uniquely identify them

{'a': ['6'], 'b': ['10'], 'd': ['7'], u'Xinghui': [(u'0', 'n2')], 't2': ['13'], 't3': ['2'], 'o': ['3'], 'w': ['5'], 'a3': ['8'], 'a2': ['1'], 't': ['3'], u'Sydney': [(u'16', 'n')], 'o2': ['12'], 'a4': ['14']}

Mappings between relations and tokens. Uniquely identified by also specifying the parent of that relation.

{'ARG0': [[('4', 'o')]], 'ARG1-of': [[('3', 't')]], 'location': [[('15', 'a4')]], 'poss': [[('11', 'b')]]}

Mappings from a node to each child, along w

# We preprocess the sentence by using NER parser

In [5]:
ner.process_sentence(sentence)

[(u'Xinghui', u'PERSON'), (u'accepted', u'O'), (u'this', u'O'), (u'order', u'O'), (u'of', u'O'), (u'ours', u'O'), (u'and', u'O'), (u'deployed', u'O'), (u'all', u'O'), (u'the', u'O'), (u'buses', u'O'), (u'of', u'O'), (u'other', u'O'), (u'travel', u'O'), (u'agencies', u'O'), (u'in', u'O'), (u'Sydney', u'LOCATION'), (u'.', u'O')]
[u'PERSON', u'accepted', u'this', u'order', u'of', u'ours', u'and', u'deployed', u'all', u'the', u'buses', u'of', u'other', u'travel', u'agencies', u'in', u'LOCATION', u'.']
PERSON accepted this order of ours and deployed all the buses of other travel agencies in LOCATION . 
[(0, [u'Xinghui']), (16, [u'Sydney'])]


(u'PERSON accepted this order of ours and deployed all the buses of other travel agencies in LOCATION . ',
 [(0, [u'Xinghui']), (16, [u'Sydney'])])

# We preprocess the same sentence by using NLTK and observe that only one of the entities is found

In [6]:
ner.process_language([sentence]) 

['Sydney']


# Now we are going to present the entire flow of predicting the AMR structure, without using any information from the existing AMR 

In [7]:
model1 = "all_epochs=20_maxlen=30_embeddingsdim=200"
max_len1=30
embeddings_dim1=200

In [8]:
sentence = "upgrade fire control systems of India tanks ."

amr_str= """(u / upgrade-02~e.0 
      :ARG1 (s / system~e.3 
            :ARG0-of (c / control-01~e.2 
                  :ARG1 (f / fire-01~e.1)) 
            :poss~e.4 (t / tank~e.6 
                  :mod (c2 / country :wiki "India" 
                        :name (n / name :op1 "India"~e.5)))))"""
amr = AMR.parse_string(amr_str)

In [9]:
custom_amr=generate_custom_amr(amr)


Mappings between node variables and their corresponding concepts.

{'c': 'control-01', 'f': 'fire-01', 'n': 'name', 's': 'system', 'u': 'upgrade-02', 't': 'tank', 'c2': 'country'}

Mappings between nodes and all the aligned tokens: If the nodes don't havea variable (polarity, literals, quantities, interrogatives), they specify both the aligned tokens and the parent in order to uniquely identify them

{'c': ['2'], 'f': ['1'], u'India': [(u'5', 'n')], 's': ['3'], 'u': ['0'], 't': ['6']}

Mappings between relations and tokens. Uniquely identified by also specifying the parent of that relation.

{'poss': [[('4', 's')]]}

Mappings from a node to each child, along with the relation between them.

Key: c
ARG1 -> f

Key: f
Leaf

Key: India
Leaf

Key: n
op1 -> India

Key: s
ARG0-of -> c
poss -> t

Key: u
ARG1 -> s

Key: t
mod -> c2

Key: c2
wiki -> India
name -> n


All the nodes in the amr should appear here.

['c', 'f', u'India', 'n', 's', 'u', 't', 'c2']

Creating custom AMR.


Custom AMR t

In [10]:
deps = extract_dependencies(sentence)

In [11]:
(new_sentence, named_entities) = ner.process_sentence(sentence)

[(u'upgrade', u'O'), (u'fire', u'O'), (u'control', u'O'), (u'systems', u'O'), (u'of', u'O'), (u'India', u'LOCATION'), (u'tanks', u'O'), (u'.', u'O')]
[u'upgrade', u'fire', u'control', u'systems', u'of', u'LOCATION', u'tanks', u'.']
upgrade fire control systems of LOCATION tanks . 
[(5, [u'India'])]


# Now we will use our pre-trained model in order to predict the AMR structure for the original sentence 

In [12]:
from keras_lstm_flow import test_without_amr, test

In [13]:
test_without_amr(model_name=model1, tokenizer_path="./tokenizers/full_tokenizer.dump",
     data=[(sentence, deps, [])], max_len=max_len1, embedding_dim=embeddings_dim1, with_reattach=False)

Model path is:
./models/all_epochs=20_maxlen=30_embeddingsdim=200
Word index len: 
7107
Test data shape: 
(1, 7)
1
Found 400000 word vectors.
Embedding match for volume-quantity
Embedding match for distance-quantity
Embedding match for energy-quantity
Embedding match for power-quantity
Embedding match for mass-quantity
Embedding match for monetary-quantity
Embedding match for temporal-quantity
Embedding match for date-entity
First 2 not found: ["don'cha", 'it...']
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 30)            0                                            
____________________________________________________________________________________________________
input_2 (InputLayer)             (None, 30)            0                                            
__________________________

[0, 0, 0, 1, 0, 1, 3, 0, 1, 3, 2]

# we now predict the AMR structure for the sentence after the named entity was replaced 

In [18]:
replaced_sentence = "upgrade fire control systems of LOCATION tanks"

In [17]:
test_without_amr(model_name=model1, tokenizer_path="./tokenizers/full_tokenizer.dump",
     data=[(replaced_sentence, deps, [])], max_len=max_len1, embedding_dim=embeddings_dim1, with_reattach=False)

Model path is:
./models/all_epochs=20_maxlen=30_embeddingsdim=200
Word index len: 
7107
Test data shape: 
(1, 7)
1
Found 400000 word vectors.
Embedding match for volume-quantity
Embedding match for distance-quantity
Embedding match for energy-quantity
Embedding match for power-quantity
Embedding match for mass-quantity
Embedding match for monetary-quantity
Embedding match for temporal-quantity
Embedding match for date-entity
First 2 not found: ["don'cha", 'it...']
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_13 (InputLayer)            (None, 30)            0                                            
____________________________________________________________________________________________________
input_14 (InputLayer)            (None, 30)            0                                            
__________________________

[0, 0, 0, 1, 0, 1, 3, 0, 0, 1, 2, 2]

# now we are going to preprocess a sentence that is has both a Named Entity and a Date Entity

In [27]:
date_time_sentence ="Andreea is presenting her thesis in July"

In [28]:
(d_t_sentence, named_entities_2) = ner.process_sentence(date_time_sentence)

[(u'Andreea', u'PERSON'), (u'is', u'O'), (u'presenting', u'O'), (u'her', u'O'), (u'thesis', u'O'), (u'in', u'O'), (u'July', u'O')]
[u'PERSON', u'is', u'presenting', u'her', u'thesis', u'in', u'July']
PERSON is presenting her thesis in July 
[(0, [u'Andreea'])]


In [29]:
import preprocessing.DateEntitiesReplacer as der

In [30]:
(final_sentence, date_entities) = der.process_sentence(d_t_sentence)

[(u'PERSON', u'O'), (u'is', u'O'), (u'presenting', u'O'), (u'her', u'O'), (u'thesis', u'O'), (u'in', u'O'), (u'July', u'DATE')]
PERSON is presenting her thesis in DATE 
[(6, [u'July'])]
