-
Notifications
You must be signed in to change notification settings - Fork 2.7k
/
ParsingSystem.java
234 lines (197 loc) · 7.25 KB
/
ParsingSystem.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
package edu.stanford.nlp.parser.nndep;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Defines a transition-based parsing framework for dependency parsing.
*
* @author Danqi Chen
*/
public abstract class ParsingSystem {
/** A logger for this class */
private static final Redwood.RedwoodChannels log = Redwood.channels(ParsingSystem.class);
/**
* Defines language-specific settings for this parsing instance.
*/
private final TreebankLanguagePack tlp;
/**
* Dependency label used between root of sentence and ROOT node.
*/
protected final String rootLabel;
protected final List<String> labels, transitions;
/**
* Determine whether the given transition is legal for this
* configuration.
*
* @param c Parsing configuration
* @param t Transition string
* @return Whether the given transition is legal in this
* configuration
*/
public abstract boolean canApply(Configuration c, String t);
/**
* Apply the given transition to the given configuration, modifying
* the configuration's state in place.
*/
public abstract void apply(Configuration c, String t);
/**
* Provide a static-oracle recommendation for the next parsing step
* to take.
*
* @param c Current parser configuration
* @param dTree Gold tree which parser needs to reach
* @return Transition string
*/
public abstract String getOracle(Configuration c, DependencyTree dTree);
/**
* Determine whether applying the given transition in the given
* configuration tree will leave in us a state in which we can reach
* the gold tree. (Useful for building a dynamic oracle.)
*/
abstract boolean isOracle(Configuration c, String t, DependencyTree dTree);
/**
* Build an initial parser configuration from the given sentence.
*/
public abstract Configuration initialConfiguration(CoreMap sentence);
/**
* Determine if the given configuration corresponds to a parser which
* has completed its parse.
*/
abstract boolean isTerminal(Configuration c);
/**
* Return the number of transitions.
*/
public int numTransitions() {
return transitions.size();
}
// TODO pass labels as Map<String, GrammaticalRelation>; use GrammaticalRelation throughout
/**
* @param tlp TreebankLanguagePack describing the language being
* parsed
* @param labels A list of possible dependency relation labels, with
* the ROOT relation label as the first element
*/
public ParsingSystem(TreebankLanguagePack tlp, List<String> labels, List<String> transitions, boolean verbose) {
this.tlp = tlp;
this.labels = new ArrayList<>(labels);
//NOTE: assume that the first element of labels is rootLabel
rootLabel = labels.get(0);
this.transitions = transitions;
if (verbose) {
log.info(Config.SEPARATOR);
log.info("#Transitions: " + numTransitions());
log.info("#Labels: " + labels.size());
log.info("ROOTLABEL: " + rootLabel);
}
}
public int getTransitionID(String s) {
int numTrans = numTransitions();
for (int k = 0; k < numTrans; ++k) {
if (transitions.get(k).equals(s)) {
return k;
}
}
return -1;
}
private Set<String> getPunctuationTags() {
if (tlp instanceof PennTreebankLanguagePack) {
// Hack for English: match punctuation tags used in Danqi's paper
return new HashSet<>(Arrays.asList("''", ",", ".", ":", "``", "-LRB-", "-RRB-"));
} else {
return CollectionUtils.asSet(tlp.punctuationTags());
}
}
/**
* Evaluate performance on a list of sentences, predicted parses,
* and gold parses.
*
* @return A map from metric name to metric value
*/
public Map<String, Double> evaluate(List<CoreMap> sentences, List<DependencyTree> trees,
List<DependencyTree> goldTrees) {
Map<String, Double> result = new HashMap<>();
// We'll skip words which are punctuation. Retrieve tags indicating
// punctuation in this treebank.
Set<String> punctuationTags = getPunctuationTags();
if (trees.size() != goldTrees.size()) {
log.error("Incorrect number of trees.");
return null;
}
int correctArcs = 0;
int correctArcsNoPunc = 0;
int correctHeads = 0;
int correctHeadsNoPunc = 0;
int correctTrees = 0;
int correctTreesNoPunc = 0;
int correctRoot = 0;
int sumArcs = 0;
int sumArcsNoPunc = 0;
for (int i = 0; i < trees.size(); ++i) {
List<CoreLabel> tokens = sentences.get(i).get(CoreAnnotations.TokensAnnotation.class);
if (trees.get(i).n != goldTrees.get(i).n) {
log.error("Tree " + (i + 1) + ": incorrect number of nodes.");
return null;
}
if (!trees.get(i).isTree()) {
log.error("Tree " + (i + 1) + ": illegal.");
return null;
}
int nCorrectHead = 0;
int nCorrectHeadNoPunc = 0;
int nNoPunc = 0;
for (int j = 1; j <= trees.get(i).n; ++j) {
if (trees.get(i).getHead(j) == goldTrees.get(i).getHead(j)) {
++correctHeads;
++nCorrectHead;
if (trees.get(i).getLabel(j).equals(goldTrees.get(i).getLabel(j)))
++correctArcs;
}
++sumArcs;
String tag = tokens.get(j - 1).tag();
if (!punctuationTags.contains(tag)) {
++sumArcsNoPunc;
++nNoPunc;
if (trees.get(i).getHead(j) == goldTrees.get(i).getHead(j)) {
++correctHeadsNoPunc;
++nCorrectHeadNoPunc;
if (trees.get(i).getLabel(j).equals(goldTrees.get(i).getLabel(j)))
++correctArcsNoPunc;
}
}
}
if (nCorrectHead == trees.get(i).n)
++correctTrees;
if (nCorrectHeadNoPunc == nNoPunc)
++correctTreesNoPunc;
if (trees.get(i).getRoot() == goldTrees.get(i).getRoot())
++correctRoot;
}
result.put("UAS", correctHeads * 100.0 / sumArcs);
result.put("UASnoPunc", correctHeadsNoPunc * 100.0 / sumArcsNoPunc);
result.put("LAS", correctArcs * 100.0 / sumArcs);
result.put("LASnoPunc", correctArcsNoPunc * 100.0 / sumArcsNoPunc);
result.put("UEM", correctTrees * 100.0 / trees.size());
result.put("UEMnoPunc", correctTreesNoPunc * 100.0 / trees.size());
result.put("ROOT", correctRoot * 100.0 / trees.size());
return result;
}
public double getUAS(List<CoreMap> sentences, List<DependencyTree> trees, List<DependencyTree> goldTrees) {
Map<String, Double> result = evaluate(sentences, trees, goldTrees);
return result == null || !result.containsKey("UAS") ? -1.0 : result.get("UAS");
}
public double getUASnoPunc(List<CoreMap> sentences, List<DependencyTree> trees, List<DependencyTree> goldTrees) {
Map<String, Double> result = evaluate(sentences, trees, goldTrees);
return result == null || !result.containsKey("UASnoPunc") ? -1.0 : result.get("UASnoPunc");
}
}