-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
MergeNodes.java
159 lines (138 loc) · 4.56 KB
/
MergeNodes.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
package edu.stanford.nlp.semgraph.semgrex.ssurgeon;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
/**
* Combines two words into one word
*<br>
* This requires one of the nodes to be the head of a phrase of the words,
* and the dependent words can't have any extra edges in or out of that subgraph
*<br>
* The word and lemma will be the combination of the words, squished together.
* Before and after will be updated to use the before and after of the endpoints of the subgraph
*
* @author John Bauer
*/
public class MergeNodes extends SsurgeonEdit {
public static final String LABEL = "mergeNodes";
final String name1;
final String name2;
final Map<String, String> attributes;
public MergeNodes(String name1, String name2, Map<String, String> attributes) {
this.name1 = name1;
this.name2 = name2;
this.attributes = new TreeMap<>(attributes);
}
/**
* Emits a parseable instruction string.
*/
@Override
public String toEditString() {
StringWriter buf = new StringWriter();
buf.write(LABEL); buf.write("\t");
buf.write(name1); buf.write("\t");
buf.write(name2);
// TODO: some attributes might need to be escaped!
for (String key : attributes.keySet()) {
buf.write("\t-");
buf.write(key);
buf.write(" ");
buf.write(attributes.get(key));
}
return buf.toString();
}
/**
* If the two named nodes are next to each other, and the edges of
* the graph allow for it, squish the two words into one word
*/
@Override
public boolean evaluate(SemanticGraph sg, SemgrexMatcher sm) {
IndexedWord node1 = sm.getNode(name1);
IndexedWord node2 = sm.getNode(name2);
if (node1 == null || node2 == null) {
return false;
}
List<SemanticGraphEdge> n1_to_n2 = sg.getAllEdges(node1, node2);
List<SemanticGraphEdge> n2_to_n1 = sg.getAllEdges(node2, node1);
if (n1_to_n2.size() == 0 && n2_to_n1.size() == 0) {
return false;
}
// TODO: what about the case where the dep is or has copies?
final IndexedWord head;
final IndexedWord dep;
if (n1_to_n2.size() > 0) {
head = node1;
dep = node2;
} else {
head = node2;
dep = node1;
}
// If the dep has any edges that aren't between dep & head, abort
// TODO: we could probably make it adjust edges with "dep" as source, instead
for (SemanticGraphEdge e : sg.outgoingEdgeIterable(dep)) {
if (e.getTarget() != head) {
return false;
}
}
for (SemanticGraphEdge e : sg.incomingEdgeIterable(dep)) {
if (e.getSource() != head) {
return false;
}
}
IndexedWord left;
IndexedWord right;
if (node1.index() < node2.index()) {
left = node1;
right = node2;
} else {
left = node2;
right = node1;
}
CoreLabel newLabel = AddDep.fromCheapStrings(attributes);
if (newLabel.word() == null) {
String newWord = left.word() + right.word();
newLabel.setWord(newWord);
}
if (newLabel.value() == null) {
newLabel.setValue(newLabel.word());
}
if (newLabel.lemma() == null) {
String newLemma = left.lemma() != null && right.lemma() != null ? left.lemma() + right.lemma() : null;
newLabel.setLemma(newLemma);
}
// after() and before() return "" if null, so we need to use the CoreAnnotations directly
if (newLabel.get(CoreAnnotations.AfterAnnotation.class) == null) {
newLabel.setAfter(right.after());
}
if (newLabel.get(CoreAnnotations.BeforeAnnotation.class) == null) {
newLabel.setBefore(right.before());
}
for (IndexedWord vertex : sg.vertexSet()) {
if (vertex.index() == head.index()) {
for (Class key : newLabel.keySet()) {
Object value = newLabel.get(key);
vertex.set(key, value);
}
}
}
// copy the list so that deletion doesn't hurt the iterator
// TODO: super fancy would be implementing iterator.remove()
// on the Set returned by the SemanticGraph
for (IndexedWord vertex : sg.vertexListSorted()) {
if (vertex.index() == dep.index()) {
sg.removeVertex(vertex);
}
}
// reindex everyone
AddDep.moveNodes(sg, sm, x -> (x >= dep.index()), x -> x-1, false);
return true;
}
}