From e4749f2f6bec928b4bd50909516c96d694178cf3 Mon Sep 17 00:00:00 2001 From: Sebastian Schuster Date: Wed, 18 Feb 2015 15:46:44 -0800 Subject: [PATCH] Merge branch 'master' into mt-preordering-feat --- .../lexparser/LexicalizedParserITest.java | 2 +- src/edu/stanford/nlp/dcoref/Mention.java | 2 +- .../sievepasses/DeterministicCorefSieve.java | 6 +- .../stanford/nlp/ie/NERFeatureFactory.java | 4 +- .../RelationFeatureFactory.java | 3 - src/edu/stanford/nlp/io/IOUtils.java | 11 +- src/edu/stanford/nlp/neural/SimpleTensor.java | 12 +- .../nlp/parser/common/ParserUtils.java | 11 +- .../shiftreduce/CompoundUnaryTransition.java | 5 +- .../shiftreduce/CreateTransitionSequence.java | 18 +- .../shiftreduce/FinalizeTransition.java | 26 +- .../nlp/parser/shiftreduce/Oracle.java | 7 +- .../parser/shiftreduce/ShiftReduceParser.java | 121 ++-- .../shiftreduce/demo/ShiftReduceDemo.java | 4 +- src/edu/stanford/nlp/pipeline/Annotation.java | 4 +- .../nlp/pipeline/ParserAnnotator.java | 11 +- .../nlp/tagger/util/CountClosedTags.java | 532 +++++++++--------- .../stanford/nlp/trees/DependencyScoring.java | 2 +- .../trees/EnglishGrammaticalRelations.java | 231 +++----- .../trees/EnglishGrammaticalStructure.java | 79 ++- .../nlp/trees/GrammaticalRelation.java | 56 +- .../nlp/trees/GrammaticalStructure.java | 220 ++------ .../nlp/trees/SemanticHeadFinder.java | 2 +- src/edu/stanford/nlp/trees/TreeGraph.java | 3 - src/edu/stanford/nlp/trees/TreeGraphNode.java | 151 ----- .../ChineseGrammaticalRelations.java | 214 ++----- .../nlp/trees/tregex/tsurgeon/ExciseNode.java | 9 +- .../nlp/parser/shiftreduce/OracleTest.java | 5 +- .../shiftreduce/ShiftReduceParserTest.java | 7 +- .../stanford/nlp/pipeline/AnnotationTest.java | 35 ++ .../EnglishGrammaticalStructureTest.java | 12 +- .../trees/tregex/tsurgeon/TsurgeonTest.java | 14 + 32 files changed, 732 insertions(+), 1087 deletions(-) create mode 100644 test/src/edu/stanford/nlp/pipeline/AnnotationTest.java diff --git a/itest/src/edu/stanford/nlp/parser/lexparser/LexicalizedParserITest.java b/itest/src/edu/stanford/nlp/parser/lexparser/LexicalizedParserITest.java index b5e601d51a..7c6d9b148b 100644 --- a/itest/src/edu/stanford/nlp/parser/lexparser/LexicalizedParserITest.java +++ b/itest/src/edu/stanford/nlp/parser/lexparser/LexicalizedParserITest.java @@ -165,7 +165,7 @@ public void testParseString() { "My/PRP$ dog/NN likes/VBZ to/TO eat/VB yoghurt/NN ./.", "(ROOT (S (NP (PRP$ My) (NN dog)) (VP (VBZ likes) (S (VP (TO to) (VP (VB eat) (NP (NN yoghurt)))))) (. .)))", "poss(dog-2, My-1) nsubj(likes-3, dog-2) root(ROOT-0, likes-3) aux(eat-5, to-4) xcomp(likes-3, eat-5) dobj(eat-5, yoghurt-6)", - "poss(dog-2, My-1) nsubj(likes-3, dog-2) xsubj(eat-5, dog-2) root(ROOT-0, likes-3) aux(eat-5, to-4) xcomp(likes-3, eat-5) dobj(eat-5, yoghurt-6)"); + "poss(dog-2, My-1) nsubj(likes-3, dog-2) nsubj(eat-5, dog-2) root(ROOT-0, likes-3) aux(eat-5, to-4) xcomp(likes-3, eat-5) dobj(eat-5, yoghurt-6)"); } /** diff --git a/src/edu/stanford/nlp/dcoref/Mention.java b/src/edu/stanford/nlp/dcoref/Mention.java index f8a964307c..70a90bbc43 100644 --- a/src/edu/stanford/nlp/dcoref/Mention.java +++ b/src/edu/stanford/nlp/dcoref/Mention.java @@ -1367,7 +1367,7 @@ public String getRelation(){ if(relation.toString().startsWith("prep") || relation == EnglishGrammaticalRelations.PREPOSITIONAL_OBJECT || relation == EnglishGrammaticalRelations.TEMPORAL_MODIFIER || relation == EnglishGrammaticalRelations.ADV_CLAUSE_MODIFIER || relation == EnglishGrammaticalRelations.ADVERBIAL_MODIFIER || relation == EnglishGrammaticalRelations.PREPOSITIONAL_COMPLEMENT) return "adjunct"; // subject relations - if(relation == EnglishGrammaticalRelations.NOMINAL_SUBJECT || relation == EnglishGrammaticalRelations.CLAUSAL_SUBJECT || relation == EnglishGrammaticalRelations.CONTROLLING_SUBJECT) return "subject"; + if(relation == EnglishGrammaticalRelations.NOMINAL_SUBJECT || relation == EnglishGrammaticalRelations.CLAUSAL_SUBJECT) return "subject"; if(relation == EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT || relation == EnglishGrammaticalRelations.CLAUSAL_PASSIVE_SUBJECT) return "subject"; // verbal argument relations diff --git a/src/edu/stanford/nlp/dcoref/sievepasses/DeterministicCorefSieve.java b/src/edu/stanford/nlp/dcoref/sievepasses/DeterministicCorefSieve.java index 6e9819a44e..f5e9745da7 100644 --- a/src/edu/stanford/nlp/dcoref/sievepasses/DeterministicCorefSieve.java +++ b/src/edu/stanford/nlp/dcoref/sievepasses/DeterministicCorefSieve.java @@ -477,8 +477,8 @@ private static List sortMentionsForPronoun(List l, Mention m1, if (sameSentence) { Tree tree = m1.contextParseTree; Tree current = m1.mentionSubTree; - while (true) { - current = current.ancestor(1, tree); + current = current.parent(tree); + while (current != null) { if (current.label().value().startsWith("S")) { for (Mention m : l) { if (!sorted.contains(m) && current.dominates(m.mentionSubTree)) { @@ -486,7 +486,7 @@ private static List sortMentionsForPronoun(List l, Mention m1, } } } - if (current.label().value().equals("ROOT") || current.ancestor(1, tree)==null) break; + current = current.parent(tree); } if (SieveCoreferenceSystem.logger.isLoggable(Level.FINEST)) { if (l.size()!=sorted.size()) { diff --git a/src/edu/stanford/nlp/ie/NERFeatureFactory.java b/src/edu/stanford/nlp/ie/NERFeatureFactory.java index 664e59de75..e4ca0f3dcd 100644 --- a/src/edu/stanford/nlp/ie/NERFeatureFactory.java +++ b/src/edu/stanford/nlp/ie/NERFeatureFactory.java @@ -1709,7 +1709,7 @@ protected Collection featuresCpC(PaddedList cInfo, int loc) { featuresCpC.add(pWord + "-PSEQpW"); // added later after goodCoNLL } - if (true) { // TODO [cdm Jul 2014]: should really be if (flags.useDistSim) but fixing current itest.... + if (flags.useDistSim) { featuresCpC.add(pDS + "-PSEQpDS"); featuresCpC.add(cDS + "-PSEQcDS"); featuresCpC.add(pDS+ '-' +cDS + "-PSEQpcDS"); @@ -1747,7 +1747,7 @@ protected Collection featuresCpC(PaddedList cInfo, int loc) { if (flags.useTypeySequences) { featuresCpC.add(cShape + "-TPS2"); featuresCpC.add(n.get(CoreAnnotations.ShapeAnnotation.class) + "-TNS1"); - // featuresCpC.add(pShape) + "-" + cShape) + "-TPS"); // duplicates -TYPES, so now omitted; you may need to slighly increase sigma to duplicate previous results, however. + // featuresCpC.add(pShape) + "-" + cShape) + "-TPS"); // duplicates -TYPES, so now omitted; you may need to slightly increase sigma to duplicate previous results, however. } if (flags.useTaggySequences) { diff --git a/src/edu/stanford/nlp/ie/machinereading/RelationFeatureFactory.java b/src/edu/stanford/nlp/ie/machinereading/RelationFeatureFactory.java index e930fed66d..d6382f4d1e 100644 --- a/src/edu/stanford/nlp/ie/machinereading/RelationFeatureFactory.java +++ b/src/edu/stanford/nlp/ie/machinereading/RelationFeatureFactory.java @@ -929,9 +929,6 @@ private static GrammaticalRelation generalizeRelation(GrammaticalRelation gr) { return generalGR; } } - if (gr.equals(EnglishGrammaticalRelations.CONTROLLING_SUBJECT)) { - return EnglishGrammaticalRelations.SUBJECT; - } return gr; } diff --git a/src/edu/stanford/nlp/io/IOUtils.java b/src/edu/stanford/nlp/io/IOUtils.java index 3dbfb34827..0a711bc42c 100644 --- a/src/edu/stanford/nlp/io/IOUtils.java +++ b/src/edu/stanford/nlp/io/IOUtils.java @@ -453,12 +453,13 @@ public static InputStream getInputStreamFromURLOrClasspathOrFileSystem(String te } if (textFileOrUrl.endsWith(".gz")) { - // gunzip it if necessary. Since a GZIPInputStream has a buffer in it, don't need a second level of buffering + // gunzip it if necessary in = new GZIPInputStream(in, GZIP_FILE_BUFFER_SIZE); - } else { - // buffer this stream - in = new BufferedInputStream(in); - } + } + + // buffer this stream. even gzip streams benefit from buffering, + // such as for the shift reduce parser + in = new BufferedInputStream(in); return in; } diff --git a/src/edu/stanford/nlp/neural/SimpleTensor.java b/src/edu/stanford/nlp/neural/SimpleTensor.java index d784528048..ef3ebb7d0a 100644 --- a/src/edu/stanford/nlp/neural/SimpleTensor.java +++ b/src/edu/stanford/nlp/neural/SimpleTensor.java @@ -18,7 +18,7 @@ * @author Richard Socher */ public class SimpleTensor implements Serializable { - private SimpleMatrix[] slices; + private final SimpleMatrix[] slices; final int numRows; final int numCols; @@ -286,5 +286,15 @@ public void remove() { } } + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + for (int slice = 0; slice < numSlices; ++slice) { + result.append("Slice " + slice + "\n"); + result.append(slices[slice]); + } + return result.toString(); + } + private static final long serialVersionUID = 1; } diff --git a/src/edu/stanford/nlp/parser/common/ParserUtils.java b/src/edu/stanford/nlp/parser/common/ParserUtils.java index 40a96c67b8..a7e016ceed 100644 --- a/src/edu/stanford/nlp/parser/common/ParserUtils.java +++ b/src/edu/stanford/nlp/parser/common/ParserUtils.java @@ -4,6 +4,7 @@ import java.util.Collections; import java.util.List; +import edu.stanford.nlp.ling.HasTag; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.trees.LabeledScoredTreeFactory; import edu.stanford.nlp.trees.Tree; @@ -14,7 +15,7 @@ public class ParserUtils { /** * Construct a fall through tree in case we can't parse this sentence * @param words - * @return a tree with X for all the internal nodes + * @return a tree with X for all the internal nodes. preterminals have the right tag if the words are tagged */ public static Tree xTree(List words) { TreeFactory lstf = new LabeledScoredTreeFactory(); @@ -22,7 +23,13 @@ public static Tree xTree(List words) { for (HasWord obj : words) { String s = obj.word(); Tree t = lstf.newLeaf(s); - Tree t2 = lstf.newTreeNode("X", Collections.singletonList(t)); + String tag = "X"; + if (obj instanceof HasTag) { + if (((HasTag) obj).tag() != null) { + tag = ((HasTag) obj).tag(); + } + } + Tree t2 = lstf.newTreeNode(tag, Collections.singletonList(t)); lst2.add(t2); } return lstf.newTreeNode("X", lst2); diff --git a/src/edu/stanford/nlp/parser/shiftreduce/CompoundUnaryTransition.java b/src/edu/stanford/nlp/parser/shiftreduce/CompoundUnaryTransition.java index 4d91421f97..84cb83ae1a 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/CompoundUnaryTransition.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/CompoundUnaryTransition.java @@ -19,7 +19,10 @@ * @author John Bauer */ public class CompoundUnaryTransition implements Transition { - /** labels[0] is the top of the unary chain */ + /** + * labels[0] is the top of the unary chain. + * A unary chain that results in a ROOT will have labels[0] == ROOT, for example. + */ public final String[] labels; /** root transitions are illegal in the middle of the tree, naturally */ diff --git a/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java b/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java index 86ee42b75f..7e84face82 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java @@ -1,6 +1,7 @@ package edu.stanford.nlp.parser.shiftreduce; import java.util.List; +import java.util.Set; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.trees.Tree; @@ -12,37 +13,38 @@ public class CreateTransitionSequence { // we could change this if we wanted to include options. private CreateTransitionSequence() {} - public static List createTransitionSequence(Tree tree, boolean compoundUnary) { + public static List createTransitionSequence(Tree tree, boolean compoundUnary, Set rootStates, Set rootOnlyStates) { List transitions = Generics.newArrayList(); - createTransitionSequenceHelper(transitions, tree, compoundUnary, true); - transitions.add(new FinalizeTransition()); + createTransitionSequenceHelper(transitions, tree, compoundUnary, rootOnlyStates); + transitions.add(new FinalizeTransition(rootStates)); transitions.add(new IdleTransition()); return transitions; } - private static void createTransitionSequenceHelper(List transitions, Tree tree, boolean compoundUnary, boolean isRoot) { + private static void createTransitionSequenceHelper(List transitions, Tree tree, boolean compoundUnary, Set rootOnlyStates) { if (tree.isLeaf()) { // do nothing } else if (tree.isPreTerminal()) { transitions.add(new ShiftTransition()); } else if (tree.children().length == 1) { + boolean isRoot = rootOnlyStates.contains(tree.label().value()); if (compoundUnary) { List labels = Generics.newArrayList(); while (tree.children().length == 1 && !tree.isPreTerminal()) { labels.add(tree.label().value()); tree = tree.children()[0]; } - createTransitionSequenceHelper(transitions, tree, compoundUnary, false); + createTransitionSequenceHelper(transitions, tree, compoundUnary, rootOnlyStates); transitions.add(new CompoundUnaryTransition(labels, isRoot)); } else { - createTransitionSequenceHelper(transitions, tree.children()[0], compoundUnary, false); + createTransitionSequenceHelper(transitions, tree.children()[0], compoundUnary, rootOnlyStates); transitions.add(new UnaryTransition(tree.label().value(), isRoot)); } } else if (tree.children().length == 2) { - createTransitionSequenceHelper(transitions, tree.children()[0], compoundUnary, false); - createTransitionSequenceHelper(transitions, tree.children()[1], compoundUnary, false); + createTransitionSequenceHelper(transitions, tree.children()[0], compoundUnary, rootOnlyStates); + createTransitionSequenceHelper(transitions, tree.children()[1], compoundUnary, rootOnlyStates); // This is the tricky part... need to decide if the binary // transition is a left or right transition. This is done by diff --git a/src/edu/stanford/nlp/parser/shiftreduce/FinalizeTransition.java b/src/edu/stanford/nlp/parser/shiftreduce/FinalizeTransition.java index 506e5f15e0..7705902435 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/FinalizeTransition.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/FinalizeTransition.java @@ -1,15 +1,38 @@ package edu.stanford.nlp.parser.shiftreduce; import java.util.List; +import java.util.Set; import edu.stanford.nlp.parser.common.ParserConstraint; import edu.stanford.nlp.trees.Tree; +// only needed for readObject +import java.io.IOException; +import java.io.ObjectInputStream; +import java.util.Collections; +import edu.stanford.nlp.util.ErasureUtils; + /** * Transition that finishes the processing of a state */ public class FinalizeTransition implements Transition { + private Set rootStates; + + private void readObject(ObjectInputStream in) + throws IOException, ClassNotFoundException + { + ObjectInputStream.GetField fields = in.readFields(); + rootStates = ErasureUtils.uncheckedCast(fields.get("rootStates", null)); + if (rootStates == null) { + rootStates = Collections.singleton("ROOT"); + } + } + + public FinalizeTransition(Set rootStates) { + this.rootStates = rootStates; + } + public boolean isLegal(State state, List constraints) { - boolean legal = !state.finished && state.tokenPosition >= state.sentence.size() && state.stack.size() == 1; + boolean legal = !state.finished && state.tokenPosition >= state.sentence.size() && state.stack.size() == 1 && rootStates.contains(state.stack.peek().value()); if (!legal || constraints == null) { return legal; } @@ -22,6 +45,7 @@ public boolean isLegal(State state, List constraints) { return false; } } + return true; } diff --git a/src/edu/stanford/nlp/parser/shiftreduce/Oracle.java b/src/edu/stanford/nlp/parser/shiftreduce/Oracle.java index bb88639f0b..3e5a7f51ee 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/Oracle.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/Oracle.java @@ -4,6 +4,7 @@ import java.util.IdentityHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.trees.Tree; @@ -35,7 +36,9 @@ class Oracle { boolean compoundUnaries; - Oracle(List binarizedTrees, boolean compoundUnaries) { + Set rootStates; + + Oracle(List binarizedTrees, boolean compoundUnaries, Set rootStates) { this.binarizedTrees = binarizedTrees; parentMaps = Generics.newArrayList(binarizedTrees.size()); @@ -136,7 +139,7 @@ OracleTransition goldTransition(int index, State state) { // TODO: we could interject that all trees must end with ROOT, for example if (state.tokenPosition >= state.sentence.size() && state.stack.size() == 1) { - return new OracleTransition(new FinalizeTransition(), false, false, false); + return new OracleTransition(new FinalizeTransition(rootStates), false, false, false); } if (state.stack.size() == 1) { diff --git a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java index 896e733197..020ff71a1c 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java @@ -106,12 +106,13 @@ public class ShiftReduceParser extends ParserGrammar implements Serializable { FeatureFactory featureFactory; Set knownStates; + Set rootStates; + Set rootOnlyStates; public ShiftReduceParser(ShiftReduceOptions op) { this.transitionIndex = new HashIndex(); this.featureWeights = Generics.newHashMap(); this.op = op; - this.knownStates = Generics.newHashSet(); String[] classes = op.featureFactoryClass.split(";"); if (classes.length == 1) { @@ -136,10 +137,8 @@ private ShiftReduceParser(ShiftReduceOptions op, FeatureFactory factory) { this.featureWeights = Generics.newHashMap(); this.op = op; this.featureFactory = factory; - this.knownStates = Generics.newHashSet(); } - /* private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { @@ -147,18 +146,19 @@ private void readObject(ObjectInputStream in) transitionIndex = ErasureUtils.uncheckedCast(fields.get("transitionIndex", null)); op = ErasureUtils.uncheckedCast(fields.get("op", null)); featureFactory = ErasureUtils.uncheckedCast(fields.get("featureFactory", null)); - featureWeights = Generics.newHashMap(); - Map>> oldWeights = ErasureUtils.uncheckedCast(fields.get("featureWeights", null)); - for (String feature : oldWeights.keySet()) { - List> oldFeature = oldWeights.get(feature); - Weight newFeature = new Weight(); - for (int i = 0; i < oldFeature.size(); ++i) { - newFeature.updateWeight(oldFeature.get(i).object(), (float) oldFeature.get(i).score()); - } - featureWeights.put(feature, newFeature); - } + featureWeights = ErasureUtils.uncheckedCast(fields.get("featureWeights", null)); + knownStates = ErasureUtils.uncheckedCast(fields.get("knownStates", null)); + rootStates = ErasureUtils.uncheckedCast(fields.get("rootStates", null)); + if (rootStates == null) { + rootStates = Collections.singleton("ROOT"); + System.err.println("Adding rootStates: " + rootStates); + } + rootOnlyStates = ErasureUtils.uncheckedCast(fields.get("rootOnlyStates", null)); + if (rootOnlyStates == null) { + rootOnlyStates = Collections.singleton("ROOT"); + System.err.println("Adding rootOnlyStates: " + rootOnlyStates); + } } - */ @Override public Options getOp() { @@ -193,7 +193,7 @@ public boolean requiresTags() { return true; } - public ShiftReduceParser deepCopy() { + private ShiftReduceParser deepCopy() { // TODO: should we deep copy the options / factory? seems wasteful ShiftReduceParser copy = new ShiftReduceParser(op, featureFactory); copy.copyWeights(this); @@ -209,8 +209,9 @@ public void copyWeights(ShiftReduceParser other) { transitionIndex.add(transition); } - knownStates.clear(); - knownStates.addAll(other.knownStates); + knownStates = Collections.unmodifiableSet(Generics.newHashSet(other.knownStates)); + rootStates = Collections.unmodifiableSet(Generics.newHashSet(other.rootStates)); + rootOnlyStates = Collections.unmodifiableSet(Generics.newHashSet(other.rootOnlyStates)); featureWeights.clear(); for (String feature : other.featureWeights.keySet()) { @@ -400,6 +401,16 @@ public Transition findEmergencyTransition(State state, List co new UnaryTransition(state.stack.peek().value().substring(1), false)); } + if (state.stack.size() == 1 && state.tokenPosition >= state.sentence.size()) { + // either need to finalize or transition to a root state + if (!rootStates.contains(state.stack.peek().value())) { + String root = rootStates.iterator().next(); + return ((op.compoundUnaries) ? + new CompoundUnaryTransition(Collections.singletonList(root), false) : + new UnaryTransition(root, false)); + } + } + if (state.stack.size() == 1) { return null; } @@ -541,16 +552,18 @@ public static List binarizeTreebank(Treebank treebank, Options op) { public List> createTransitionSequences(List binarizedTrees) { List> transitionLists = Generics.newArrayList(); for (Tree tree : binarizedTrees) { - List transitions = CreateTransitionSequence.createTransitionSequence(tree, op.compoundUnaries); + List transitions = CreateTransitionSequence.createTransitionSequence(tree, op.compoundUnaries, rootStates, rootOnlyStates); transitionLists.add(transitions); } return transitionLists; } - public static void findKnownStates(List binarizedTrees, Set knownStates) { + public static Set findKnownStates(List binarizedTrees) { + Set knownStates = Generics.newHashSet(); for (Tree tree : binarizedTrees) { findKnownStates(tree, knownStates); } + return Collections.unmodifiableSet(knownStates); } public static void findKnownStates(Tree tree, Set knownStates) { @@ -782,6 +795,17 @@ public TrainTreeProcessor newInstance() { } } + /** + * Trains a batch of trees and returns the following: a list of + * Update objects, the number of transitions correct, and the number + * of transitions wrong. + *
+ * If the model is trained with multiple threads, it is expected + * that a valid MulticoreWrapper is passed in which does the + * processing. In that case, the processing is done on all of the + * trees without updating any weights, which allows the results for + * multithreaded training to be reproduced. + */ private Triple, Integer, Integer> trainBatch(List indices, List binarizedTrees, List> transitionLists, List updates, Oracle oracle, MulticoreWrapper> wrapper) { int numCorrect = 0; int numWrong = 0; @@ -805,6 +829,40 @@ private Triple, Integer, Integer> trainBatch(List indices, return new Triple, Integer, Integer>(updates, numCorrect, numWrong); } + /** + * Get all of the states which occur at the root, even if they occur + * elsewhere in the tree. Useful for knowing when you can Finalize + * a tree + */ + private static Set findRootStates(List trees) { + Set roots = Generics.newHashSet(); + for (Tree tree : trees) { + roots.add(tree.value()); + } + return Collections.unmodifiableSet(roots); + } + + /** + * Get all of the states which *only* occur at the root. Useful for + * knowing which transitions can't be done internal to the tree + */ + private static Set findRootOnlyStates(List trees, Set rootStates) { + Set rootOnlyStates = Generics.newHashSet(rootStates); + for (Tree tree : trees) { + for (Tree child : tree.children()) { + findRootOnlyStatesHelper(child, rootStates, rootOnlyStates); + } + } + return Collections.unmodifiableSet(rootOnlyStates); + } + + private static void findRootOnlyStatesHelper(Tree tree, Set rootStates, Set rootOnlyStates) { + rootOnlyStates.remove(tree.value()); + for (Tree child : tree.children()) { + findRootOnlyStatesHelper(child, rootStates, rootOnlyStates); + } + } + private void trainAndSave(List> trainTreebankPath, Pair devTreebankPath, String serializedPath) { @@ -824,29 +882,22 @@ private void trainAndSave(List> trainTreebankPath, retagTimer.done("Retagging"); } + knownStates = findKnownStates(binarizedTrees); + rootStates = findRootStates(binarizedTrees); + rootOnlyStates = findRootOnlyStates(binarizedTrees, rootStates); + + System.err.println("Known states: " + knownStates); + System.err.println("States which occur at the root: " + rootStates); + System.err.println("States which only occur at the root: " + rootStates); + Timing transitionTimer = new Timing(); List> transitionLists = createTransitionSequences(binarizedTrees); for (List transitions : transitionLists) { - // TODO: there is a potential bug here. So far, the assumption - // is that all unary transitions which occur at the root only - // ever occur at the root. If that assumption doesn't hold for - // some treebank, it may occur that a root transition occurs in - // the middle of the tree but is marked "isRoot", meaning it can - // never actually be used in the middle of the tree. - // - // A solution to this would be to keep a separate index of all - // the transitions which have only ever been seen in the context - // of the root. Eg, nothing comes after those transitions - // except Finalize or Idle. (That also picks up the unlikely - // case of a binary transition being a root transition.) transitionIndex.addAll(transitions); } transitionTimer.done("Converting trees into transition lists"); System.err.println("Number of transitions: " + transitionIndex.size()); - findKnownStates(binarizedTrees, knownStates); - System.err.println("Known states: " + knownStates); - Random random = new Random(op.trainOptions.randomSeed); Treebank devTreebank = null; @@ -868,7 +919,7 @@ private void trainAndSave(List> trainTreebankPath, Oracle oracle = null; if (op.trainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ORACLE) { - oracle = new Oracle(binarizedTrees, op.compoundUnaries); + oracle = new Oracle(binarizedTrees, op.compoundUnaries, rootStates); } List updates = Generics.newArrayList(); diff --git a/src/edu/stanford/nlp/parser/shiftreduce/demo/ShiftReduceDemo.java b/src/edu/stanford/nlp/parser/shiftreduce/demo/ShiftReduceDemo.java index 9e0699151b..98d8a8f901 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/demo/ShiftReduceDemo.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/demo/ShiftReduceDemo.java @@ -19,8 +19,8 @@ */ public class ShiftReduceDemo { public static void main(String[] args) { - String modelPath = "/u/nlp/data/srparser/englishSR.ser.gz"; - String taggerPath = "/u/nlp/data/pos-tagger/distrib/english-left3words-distsim.tagger"; + String modelPath = "edu/stanford/nlp/models/srparser/englishSR.ser.gz"; + String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; for (int argIndex = 0; argIndex < args.length; ) { if (args[argIndex].equals("-tagger")) { diff --git a/src/edu/stanford/nlp/pipeline/Annotation.java b/src/edu/stanford/nlp/pipeline/Annotation.java index 5507474b39..8234a0a9bc 100644 --- a/src/edu/stanford/nlp/pipeline/Annotation.java +++ b/src/edu/stanford/nlp/pipeline/Annotation.java @@ -96,8 +96,8 @@ public Annotation(List sentences) { for (CoreMap sentence : sentences) { List sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class); tokens.addAll(sentenceTokens); - if (sentence.containsKey(CoreAnnotations.TokensAnnotation.class)) { - text.append(sentence.get(CoreAnnotations.TokensAnnotation.class)); + if (sentence.containsKey(CoreAnnotations.TextAnnotation.class)) { + text.append(sentence.get(CoreAnnotations.TextAnnotation.class)); } else { // If there is no text in the sentence, fake it as best as we can if (text.length() > 0) { diff --git a/src/edu/stanford/nlp/pipeline/ParserAnnotator.java b/src/edu/stanford/nlp/pipeline/ParserAnnotator.java index 3c3d4de54f..29bbb045e5 100644 --- a/src/edu/stanford/nlp/pipeline/ParserAnnotator.java +++ b/src/edu/stanford/nlp/pipeline/ParserAnnotator.java @@ -232,6 +232,7 @@ protected void doOneSentence(Annotation annotation, CoreMap sentence) { @Override public void doOneFailedSentence(Annotation annotation, CoreMap sentence) { final List words = sentence.get(CoreAnnotations.TokensAnnotation.class); + // TODO: xTree should use existing tags if there are any (?) Tree tree = ParserUtils.xTree(words); for (CoreLabel word : words) { if (word.tag() == null) { @@ -265,8 +266,14 @@ private Tree doOneSentence(List constraints, Tree tree = null; try { tree = pq.getBestParse(); - // -10000 denotes unknown words - tree.setScore(pq.getPCFGScore() % -10000.0); + if (tree == null) { + System.err.println("WARNING: Parsing of sentence failed. " + + "Will ignore and continue: " + + Sentence.listToString(words)); + } else { + // -10000 denotes unknown words + tree.setScore(pq.getPCFGScore() % -10000.0); + } } catch (OutOfMemoryError e) { System.err.println("WARNING: Parsing of sentence ran out of memory. " + "Will ignore and continue: " + diff --git a/src/edu/stanford/nlp/tagger/util/CountClosedTags.java b/src/edu/stanford/nlp/tagger/util/CountClosedTags.java index 3d1a738f1b..9424335703 100644 --- a/src/edu/stanford/nlp/tagger/util/CountClosedTags.java +++ b/src/edu/stanford/nlp/tagger/util/CountClosedTags.java @@ -1,270 +1,262 @@ -package edu.stanford.nlp.tagger.util; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; -import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Set; -import java.util.TreeSet; -import java.util.StringTokenizer; - -import edu.stanford.nlp.ling.TaggedWord; -import edu.stanford.nlp.tagger.io.TaggedFileReader; -import edu.stanford.nlp.tagger.io.TaggedFileRecord; -import edu.stanford.nlp.tagger.maxent.TaggerConfig; -import edu.stanford.nlp.util.Generics; -import edu.stanford.nlp.util.StringUtils; - - -/** - * Implements Chris's heuristic for when a closed tag class can be - * treated as a closed tag. You count how many different words in the - * class you see in the first X% of the training data, then make sure - * you don't see any new words in the rest of the training or test data. - *
- * This handles tagged training/test data in any format handled by the - * tagger (@see edu.stanford.nlp.tagger.maxent.MaxentTagger). Files - * are specified as a comma-separated list via the flag - * -TRAIN_FILE_PROPERTY or -TEST_FILE_PROPERTY. Closed tags are - * specified as a space separated list using the flag - * -CLOSED_TAGS_PROPERTY. - *
- * CountClosedTags then reads each training file to count how many - * lines are in it. First, it reads the first - * -TRAINING_RATIO_PROPERTY fraction of the lines and keeps track of - * which words show up for each closed tag. Next, it reads the rest - * of the training file and keeps track of which words show up in the - * rest of the data that didn't show up in the rest of the training - * data. Finally, it reads all of the test files, once again tracking - * the words that didn't show up in the training data. - *
- * CountClosedTags then outputs the number of unique words that showed - * up in the TRAINING_RATIO_PROPERTY training data and the total - * number of unique words for each tag. If the -PRINT_WORDS_PROPERTY - * flag is set to true, it also prints out the sets of observed words. - *
- * @author John Bauer - */ -public class CountClosedTags { - /** - * Which tags to look for - */ - Set closedTags; - - /** - * Words seen in the first trainingRatio fraction of the trainFiles - */ - Map> trainingWords = Generics.newHashMap(); - /** - * Words seen in either trainFiles or testFiles - */ - Map> allWords = Generics.newHashMap(); - - static final double DEFAULT_TRAINING_RATIO = 2.0 / 3.0; - /** - * How much of each training file to count for trainingWords - */ - final double trainingRatio; - /** - * Whether or not the final output should print the words - */ - final boolean printWords; - - /** - * Tag separator... TODO, make this a constant - */ - static final String tagSeparator = "_"; - - // intended to be a standalone program, not a class - private CountClosedTags(Properties props) { - String tagList = props.getProperty(CLOSED_TAGS_PROPERTY); - if (tagList != null) { - closedTags = new TreeSet(); - String[] pieces = tagList.split("\\s+"); - for (String tag : pieces) { - closedTags.add(tag); - } - } else { - closedTags = null; - } - - if (props.containsKey(TRAINING_RATIO_PROPERTY)) { - trainingRatio = - Double.valueOf(props.getProperty(TRAINING_RATIO_PROPERTY)); - } else { - trainingRatio = DEFAULT_TRAINING_RATIO; - } - - printWords = Boolean.valueOf(props.getProperty(PRINT_WORDS_PROPERTY, - "false")); - } - - /** - * Count how many sentences there are in filename - */ - int countSentences(TaggedFileRecord file) - throws IOException - { - int count = 0; - for (List line : file.reader()) - ++count; - return count; - } - - /** - * Given a line, split it into tagged words and add each word to - * the given tagWordMap - */ - void addTaggedWords(List line, - Map> tagWordMap) { - for (TaggedWord taggedWord : line) { - String word = taggedWord.word(); - String tag = taggedWord.tag(); - if (closedTags == null || closedTags.contains(tag)) { - if (!tagWordMap.containsKey(tag)) { - tagWordMap.put(tag, new TreeSet()); - } - tagWordMap.get(tag).add(word); - } - } - } - - /** - * Count trainingRatio of the sentences for both trainingWords and - * allWords, and count the rest for just allWords - */ - void countTrainingTags(TaggedFileRecord file) - throws IOException - { - int sentences = countSentences(file); - int trainSentences = (int) (sentences * trainingRatio); - TaggedFileReader reader = file.reader(); - List line; - for (int i = 0; i < trainSentences && reader.hasNext(); ++i) { - line = reader.next(); - addTaggedWords(line, trainingWords); - addTaggedWords(line, allWords); - } - while (reader.hasNext()) { - line = reader.next(); - addTaggedWords(line, allWords); - } - } - - /** - * Count all the words in the given file for just allWords - */ - void countTestTags(TaggedFileRecord file) - throws IOException - { - for (List line : file.reader()) { - addTaggedWords(line, allWords); - } - } - - /** - * Print out the results found - */ - void report() { - List successfulTags = new ArrayList(); - Set tags = new TreeSet(); - tags.addAll(allWords.keySet()); - tags.addAll(trainingWords.keySet()); - if (closedTags != null) - tags.addAll(closedTags); - for (String tag : tags) { - int numTraining = (trainingWords.containsKey(tag) ? - trainingWords.get(tag).size() : 0); - int numTotal = (allWords.containsKey(tag) ? - allWords.get(tag).size() : 0); - if (numTraining == numTotal && numTraining > 0) - successfulTags.add(tag); - System.out.println(tag + " " + numTraining + " " + numTotal); - if (printWords) { - Set trainingSet = trainingWords.get(tag); - if (trainingSet == null) - trainingSet = Collections.emptySet(); - Set allSet = allWords.get(tag); - for (String word : trainingSet) { - System.out.print(" " + word); - } - if (trainingSet.size() < allSet.size()) { - System.out.println(); - System.out.print(" *"); - for (String word : allWords.get(tag)) { - if (!trainingSet.contains(word)) { - System.out.print(" " + word); - } - } - } - System.out.println(); - } - } - System.out.println(successfulTags); - } - - static final public String TEST_FILE_PROPERTY = "testFile"; - static final public String TRAIN_FILE_PROPERTY = "trainFile"; - static final public String CLOSED_TAGS_PROPERTY = "closedTags"; - static final public String TRAINING_RATIO_PROPERTY = "trainingRatio"; - static final public String PRINT_WORDS_PROPERTY = "printWords"; - - static final Set knownArgs = - Generics.newHashSet(Arrays.asList(TEST_FILE_PROPERTY, - TRAIN_FILE_PROPERTY, - CLOSED_TAGS_PROPERTY, - TRAINING_RATIO_PROPERTY, - PRINT_WORDS_PROPERTY, - TaggerConfig.ENCODING_PROPERTY, - TaggerConfig.TAG_SEPARATOR_PROPERTY)); - - static void help(String error) { - if (error != null && !error.equals("")) { - System.err.println(error); - } - System.exit(2); - } - - static void checkArgs(Properties props) { - if (!props.containsKey(TRAIN_FILE_PROPERTY)) { - help("No " + TRAIN_FILE_PROPERTY + " specified"); - } - for (Object arg : props.keySet()) { - if (!knownArgs.contains(arg)) - help("Unknown arg " + arg); - } - } - - static public void main(String[] args) - throws Exception - { - System.setOut(new PrintStream(System.out, true, "UTF-8")); - System.setErr(new PrintStream(System.err, true, "UTF-8")); - - Properties config = StringUtils.argsToProperties(args); - checkArgs(config); - - CountClosedTags cct = new CountClosedTags(config); - String trainFiles = config.getProperty(TRAIN_FILE_PROPERTY); - String testFiles = config.getProperty(TEST_FILE_PROPERTY); - List files = - TaggedFileRecord.createRecords(config, trainFiles); - for (TaggedFileRecord file : files) { - cct.countTrainingTags(file); - } - if (testFiles != null) { - files = TaggedFileRecord.createRecords(config, testFiles); - for (TaggedFileRecord file : files) { - cct.countTestTags(file); - } - } - cct.report(); - } -} +package edu.stanford.nlp.tagger.util; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.TreeSet; + +import edu.stanford.nlp.ling.TaggedWord; +import edu.stanford.nlp.tagger.io.TaggedFileReader; +import edu.stanford.nlp.tagger.io.TaggedFileRecord; +import edu.stanford.nlp.tagger.maxent.TaggerConfig; +import edu.stanford.nlp.util.Generics; +import edu.stanford.nlp.util.StringUtils; + + +/** + * Implements Chris's heuristic for when a closed tag class can be + * treated as a closed tag. You count how many different words in the + * class you see in the first X% of the training data, then make sure + * you don't see any new words in the rest of the training or test data. + *
+ * This handles tagged training/test data in any format handled by the + * tagger (@see edu.stanford.nlp.tagger.maxent.MaxentTagger). Files + * are specified as a comma-separated list via the flag + * -TRAIN_FILE_PROPERTY or -TEST_FILE_PROPERTY. Closed tags are + * specified as a space separated list using the flag + * -CLOSED_TAGS_PROPERTY. + *
+ * CountClosedTags then reads each training file to count how many + * lines are in it. First, it reads the first + * -TRAINING_RATIO_PROPERTY fraction of the lines and keeps track of + * which words show up for each closed tag. Next, it reads the rest + * of the training file and keeps track of which words show up in the + * rest of the data that didn't show up in the rest of the training + * data. Finally, it reads all of the test files, once again tracking + * the words that didn't show up in the training data. + *
+ * CountClosedTags then outputs the number of unique words that showed + * up in the TRAINING_RATIO_PROPERTY training data and the total + * number of unique words for each tag. If the -PRINT_WORDS_PROPERTY + * flag is set to true, it also prints out the sets of observed words. + *
+ * @author John Bauer + */ +public class CountClosedTags { + /** + * Which tags to look for + */ + Set closedTags; + + /** + * Words seen in the first trainingRatio fraction of the trainFiles + */ + Map> trainingWords = Generics.newHashMap(); + /** + * Words seen in either trainFiles or testFiles + */ + Map> allWords = Generics.newHashMap(); + + static final double DEFAULT_TRAINING_RATIO = 2.0 / 3.0; + /** + * How much of each training file to count for trainingWords + */ + final double trainingRatio; + /** + * Whether or not the final output should print the words + */ + final boolean printWords; + + /** + * Tag separator... + */ + private static final String tagSeparator = "_"; + + // intended to be a standalone program, not a class + private CountClosedTags(Properties props) { + String tagList = props.getProperty(CLOSED_TAGS_PROPERTY); + if (tagList != null) { + closedTags = new TreeSet(); + String[] pieces = tagList.split("\\s+"); + Collections.addAll(closedTags, pieces); + } else { + closedTags = null; + } + + if (props.containsKey(TRAINING_RATIO_PROPERTY)) { + trainingRatio = + Double.valueOf(props.getProperty(TRAINING_RATIO_PROPERTY)); + } else { + trainingRatio = DEFAULT_TRAINING_RATIO; + } + + printWords = Boolean.valueOf(props.getProperty(PRINT_WORDS_PROPERTY, + "false")); + } + + /** + * Count how many sentences there are in filename + */ + private static int countSentences(TaggedFileRecord file) + throws IOException + { + int count = 0; + for (List line : file.reader()) + ++count; + return count; + } + + /** + * Given a line, split it into tagged words and add each word to + * the given tagWordMap + */ + void addTaggedWords(List line, + Map> tagWordMap) { + for (TaggedWord taggedWord : line) { + String word = taggedWord.word(); + String tag = taggedWord.tag(); + if (closedTags == null || closedTags.contains(tag)) { + if (!tagWordMap.containsKey(tag)) { + tagWordMap.put(tag, new TreeSet()); + } + tagWordMap.get(tag).add(word); + } + } + } + + /** + * Count trainingRatio of the sentences for both trainingWords and + * allWords, and count the rest for just allWords + */ + void countTrainingTags(TaggedFileRecord file) + throws IOException + { + int sentences = countSentences(file); + int trainSentences = (int) (sentences * trainingRatio); + TaggedFileReader reader = file.reader(); + List line; + for (int i = 0; i < trainSentences && reader.hasNext(); ++i) { + line = reader.next(); + addTaggedWords(line, trainingWords); + addTaggedWords(line, allWords); + } + while (reader.hasNext()) { + line = reader.next(); + addTaggedWords(line, allWords); + } + } + + /** + * Count all the words in the given file for just allWords + */ + void countTestTags(TaggedFileRecord file) + throws IOException + { + for (List line : file.reader()) { + addTaggedWords(line, allWords); + } + } + + /** + * Print out the results found + */ + void report() { + List successfulTags = new ArrayList(); + Set tags = new TreeSet(); + tags.addAll(allWords.keySet()); + tags.addAll(trainingWords.keySet()); + if (closedTags != null) + tags.addAll(closedTags); + for (String tag : tags) { + int numTraining = (trainingWords.containsKey(tag) ? + trainingWords.get(tag).size() : 0); + int numTotal = (allWords.containsKey(tag) ? + allWords.get(tag).size() : 0); + if (numTraining == numTotal && numTraining > 0) + successfulTags.add(tag); + System.out.println(tag + " " + numTraining + " " + numTotal); + if (printWords) { + Set trainingSet = trainingWords.get(tag); + if (trainingSet == null) + trainingSet = Collections.emptySet(); + Set allSet = allWords.get(tag); + for (String word : trainingSet) { + System.out.print(" " + word); + } + if (trainingSet.size() < allSet.size()) { + System.out.println(); + System.out.print(" *"); + for (String word : allWords.get(tag)) { + if (!trainingSet.contains(word)) { + System.out.print(" " + word); + } + } + } + System.out.println(); + } + } + System.out.println(successfulTags); + } + + public static final String TEST_FILE_PROPERTY = "testFile"; + public static final String TRAIN_FILE_PROPERTY = "trainFile"; + public static final String CLOSED_TAGS_PROPERTY = "closedTags"; + public static final String TRAINING_RATIO_PROPERTY = "trainingRatio"; + public static final String PRINT_WORDS_PROPERTY = "printWords"; + + private static final Set knownArgs = + Generics.newHashSet(Arrays.asList(TEST_FILE_PROPERTY, + TRAIN_FILE_PROPERTY, + CLOSED_TAGS_PROPERTY, + TRAINING_RATIO_PROPERTY, + PRINT_WORDS_PROPERTY, + TaggerConfig.ENCODING_PROPERTY, + TaggerConfig.TAG_SEPARATOR_PROPERTY)); + + private static void help(String error) { + if (error != null && !error.equals("")) { + System.err.println(error); + } + System.exit(2); + } + + private static void checkArgs(Properties props) { + if (!props.containsKey(TRAIN_FILE_PROPERTY)) { + help("No " + TRAIN_FILE_PROPERTY + " specified"); + } + for (String arg : props.stringPropertyNames()) { + if (!knownArgs.contains(arg)) + help("Unknown arg " + arg); + } + } + + public static void main(String[] args) throws Exception { + System.setOut(new PrintStream(System.out, true, "UTF-8")); + System.setErr(new PrintStream(System.err, true, "UTF-8")); + + Properties config = StringUtils.argsToProperties(args); + checkArgs(config); + + CountClosedTags cct = new CountClosedTags(config); + String trainFiles = config.getProperty(TRAIN_FILE_PROPERTY); + String testFiles = config.getProperty(TEST_FILE_PROPERTY); + List files = + TaggedFileRecord.createRecords(config, trainFiles); + for (TaggedFileRecord file : files) { + cct.countTrainingTags(file); + } + if (testFiles != null) { + files = TaggedFileRecord.createRecords(config, testFiles); + for (TaggedFileRecord file : files) { + cct.countTestTags(file); + } + } + cct.report(); + } +} diff --git a/src/edu/stanford/nlp/trees/DependencyScoring.java b/src/edu/stanford/nlp/trees/DependencyScoring.java index de6c8a530b..c4e41d6eaf 100644 --- a/src/edu/stanford/nlp/trees/DependencyScoring.java +++ b/src/edu/stanford/nlp/trees/DependencyScoring.java @@ -493,7 +493,7 @@ public GrammaticalRelation get(Object key) { throw new UnsupportedOperationException(); } String strkey = (String)key; - return new GrammaticalRelation(Language.Any, strkey, null, null, DEPENDENT) { + return new GrammaticalRelation(Language.Any, strkey, null, DEPENDENT) { private static final long serialVersionUID = 1L; @Override diff --git a/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java b/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java index 39a7c27cbd..553709fdb9 100644 --- a/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java +++ b/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java @@ -26,10 +26,10 @@ package edu.stanford.nlp.trees; -import edu.stanford.nlp.trees.GrammaticalRelation.GrammaticalRelationAnnotation; import edu.stanford.nlp.trees.GrammaticalRelation.Language; import edu.stanford.nlp.trees.tregex.TregexPatternCompiler; import edu.stanford.nlp.util.Generics; +import edu.stanford.nlp.util.StringUtils; import java.util.*; import java.util.concurrent.ConcurrentHashMap; @@ -116,9 +116,9 @@ private EnglishGrammaticalRelations() {} // TODO: remove everything but "to be". Must do this carefully to // make sure we like all the dependency changes that happen static final String copularWordRegex = - "/^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|seem|seems|seemed|seeming|appear|appears|appeared|stay|stays|stayed|remain|remains|remained|resemble|resembles|resembled|resembling|become|becomes|became|becoming)$/"; + "/^(?i:" + StringUtils.join(SemanticHeadFinder.copulaVerbs, "|") + ")$/"; static final String clausalComplementRegex = - "/^(?i:seem|seems|seemed|seeming|resemble|resembles|resembled|resembling|become|becomes|became|becoming)$/"; + "/^(?i:seem|seems|seemed|seeming|resemble|resembles|resembled|resembling|become|becomes|became|becoming|remain|remains|remained|remaining)$/"; private static final String passiveAuxWordRegex = "/^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|seem|seems|seemed|seeming|appear|appears|appeared|become|becomes|became|becoming|get|got|getting|gets|gotten|remains|remained|remain)$/"; private static final String beAuxiliaryRegex = @@ -159,9 +159,8 @@ private EnglishGrammaticalRelations() {} */ public static final GrammaticalRelation PREDICATE = new GrammaticalRelation(Language.English, "pred", "predicate", - PredicateGRAnnotation.class, DEPENDENT, "S|SINV", tregexCompiler, + DEPENDENT, "S|SINV", tregexCompiler, "S|SINV <# VP=target"); - public static class PredicateGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -173,13 +172,12 @@ public static class PredicateGRAnnotation extends GrammaticalRelationAnnotation */ public static final GrammaticalRelation AUX_MODIFIER = new GrammaticalRelation(Language.English, "aux", "auxiliary", - AuxModifierGRAnnotation.class, DEPENDENT, "VP|SQ|SINV|CONJP", tregexCompiler, + DEPENDENT, "VP|SQ|SINV|CONJP", tregexCompiler, "VP < VP < (/^(?:TO|MD|VB.*|AUXG?|POS)$/=target)", "SQ|SINV < (/^(?:VB|MD|AUX)/=target $++ /^(?:VP|ADJP)/)", "CONJP < TO=target < VB", // (CONJP not to mention) // add handling of tricky VP fronting cases... "SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBG))"); - public static class AuxModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -192,13 +190,12 @@ public static class AuxModifierGRAnnotation extends GrammaticalRelationAnnotatio */ public static final GrammaticalRelation AUX_PASSIVE_MODIFIER = new GrammaticalRelation(Language.English, "auxpass", "passive auxiliary", - AuxPassiveGRAnnotation.class, AUX_MODIFIER, "VP|SQ|SINV", tregexCompiler, + AUX_MODIFIER, "VP|SQ|SINV", tregexCompiler, "VP < (/^(?:VB|AUX|POS)/=target < " + passiveAuxWordRegex + " ) < (VP|ADJP [ < VBN|VBD | < (VP|ADJP < VBN|VBD) < CC ] )", "SQ|SINV < (/^(?:VB|AUX|POS)/=target < " + beAuxiliaryRegex + " $++ (VP < VBD|VBN))", // add handling of tricky VP fronting cases... "SINV < (VP=target < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ") $-- (VP < VBD|VBN))", "SINV < (VP=target < (VP < (/^(?:VB|AUX|POS)/ < " + beAuxiliaryRegex + ")) $-- (VP < VBD|VBN))"); - public static class AuxPassiveGRAnnotation extends GrammaticalRelationAnnotation { } /** * The "copula" grammatical relation. A copula is the relation between @@ -210,14 +207,13 @@ public static class AuxPassiveGRAnnotation extends GrammaticalRelationAnnotation */ public static final GrammaticalRelation COPULA = new GrammaticalRelation(Language.English, "cop", "copula", - CopulaGRAnnotation.class, AUX_MODIFIER, "VP|SQ|SINV|SBARQ", tregexCompiler, + AUX_MODIFIER, "VP|SQ|SINV|SBARQ", tregexCompiler, "VP < (/^(?:VB|AUX)/=target < " + copularWordRegex + " [ $++ (/^(?:ADJP|NP$|WHNP$)/ !< (VBN|VBD !$++ /^N/)) | $++ (S <: (ADJP < JJ)) ] )", "SQ|SINV < (/^(?:VB|AUX)/=target < " + copularWordRegex + " [ $++ (ADJP !< VBN|VBD) | $++ (NP $++ NP) | $++ (S <: (ADJP < JJ)) ] )", // matches (what, is) in "what is that" after the SQ has been flattened out of the tree "SBARQ < (/^(?:VB|AUX)/=target < " + copularWordRegex + ") < (WHNP < WP)", // "Such a great idea this was" "SINV <# (NP $++ (NP $++ (VP=target < (/^(?:VB|AUX)/ < " + copularWordRegex + "))))"); - public static class CopulaGRAnnotation extends GrammaticalRelationAnnotation { } private static final String ETC_PAT = "(FW < /^(?i:etc)$/)"; @@ -246,7 +242,7 @@ public static class CopulaGRAnnotation extends GrammaticalRelationAnnotation { } */ public static final GrammaticalRelation CONJUNCT = new GrammaticalRelation(Language.English, "conj", "conjunct", - ConjunctGRAnnotation.class, DEPENDENT, "VP|(?:WH)?NP(?:-TMP|-ADV)?|ADJP|PP|QP|ADVP|UCP(?:-TMP|-ADV)?|S|NX|SBAR|SBARQ|SINV|SQ|JJP|NML|RRC", tregexCompiler, + DEPENDENT, "VP|(?:WH)?NP(?:-TMP|-ADV)?|ADJP|PP|QP|ADVP|UCP(?:-TMP|-ADV)?|S|NX|SBAR|SBARQ|SINV|SQ|JJP|NML|RRC", tregexCompiler, new String[] { // remember conjunction can be left or right headed.... // this is more ugly, but the first 3 patterns are now duplicated and for clausal things, that daughter to the left of the CC/CONJP can't be a PP or RB or ADVP either // non-parenthetical or comma in suitable phrase with conjunction to left @@ -286,7 +282,6 @@ public static class CopulaGRAnnotation extends GrammaticalRelationAnnotation { } // also catches some missing examples of etc as conj "/^(?:VP|S|SBAR|SBARQ|SINV|ADJP|PP|QP|(?:WH)?NP(?:-TMP|-ADV)?|ADVP|UCP(?:-TMP|-ADV)?|NX|NML)$/ [ < (CC $++ (CC|CONJP $+ !/^(?:PRN|``|''|-[LR]RB-|,|:|\\.)$/=target)) | <- " + ETC_PAT_target + " | <- " + FW_ETC_PAT_target + " ]", }); - public static class ConjunctGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -298,11 +293,10 @@ public static class ConjunctGRAnnotation extends GrammaticalRelationAnnotation { */ public static final GrammaticalRelation COORDINATION = new GrammaticalRelation(Language.English, "cc", "coordination", - CoordinationGRAnnotation.class, DEPENDENT, ".*", tregexCompiler, + DEPENDENT, ".*", tregexCompiler, new String[] { "__ [ < (CC=target !< /^(?i:either|neither|both)$/ ) | < (CONJP=target !< (RB < /^(?i:not)$/ $+ (RB|JJ < /^(?i:only|just|merely)$/))) ]" }); - public static class CoordinationGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -317,12 +311,11 @@ public static class CoordinationGRAnnotation extends GrammaticalRelationAnnotati */ public static final GrammaticalRelation PUNCTUATION = new GrammaticalRelation(Language.English, "punct", "punctuation", - PunctuationGRAnnotation.class, DEPENDENT, ".*", tregexCompiler, + DEPENDENT, ".*", tregexCompiler, new String[] { "__ < /^(?:\\.|:|,|''|``|\\*|-LRB-|-RRB-|HYPH)$/=target", "__ < (NFP=target !< " + WESTERN_SMILEY + " !< " + ASIAN_SMILEY + ")", }); - public static class PunctuationGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -335,9 +328,7 @@ public static class PunctuationGRAnnotation extends GrammaticalRelationAnnotatio * "Clinton defeated Dole" → arg(defeated, Clinton), arg(defeated, Dole) */ public static final GrammaticalRelation ARGUMENT = - new GrammaticalRelation(Language.English, "arg", "argument", - ArgumentGRAnnotation.class, DEPENDENT); - public static class ArgumentGRAnnotation extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.English, "arg", "argument", DEPENDENT); /** @@ -351,9 +342,7 @@ public static class ArgumentGRAnnotation extends GrammaticalRelationAnnotation { * "What she said is untrue" → subj(is, What she said) */ public static final GrammaticalRelation SUBJECT = - new GrammaticalRelation(Language.English, "subj", "subject", - SubjectGRAnnotation.class, ARGUMENT); - public static class SubjectGRAnnotation extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.English, "subj", "subject", ARGUMENT); /** @@ -365,7 +354,7 @@ public static class SubjectGRAnnotation extends GrammaticalRelationAnnotation { */ public static final GrammaticalRelation NOMINAL_SUBJECT = new GrammaticalRelation(Language.English, "nsubj", "nominal subject", - NominalSubjectGRAnnotation.class, SUBJECT, "S|SQ|SBARQ|SINV|SBAR|PRN", tregexCompiler, + SUBJECT, "S|SQ|SBARQ|SINV|SBAR|PRN", tregexCompiler, new String[] { "S < ((NP|WHNP=target !< EX !<# (/^NN/ < (" + timeWordRegex + "))) $++ VP)", "S < ( NP=target <# (/^NN/ < " + timeWordRegex + ") !$++ NP $++VP)", @@ -404,7 +393,6 @@ public static class SubjectGRAnnotation extends GrammaticalRelationAnnotation { // the PP matches (is, WHNP) in "what is on the test" "SBARQ <1 WHNP=target < (SQ < (/^(?:VB|AUX)/ < " + copularWordRegex + ") [< (NP < EX) | < PP])" }); - public static class NominalSubjectGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -421,11 +409,10 @@ public static class NominalSubjectGRAnnotation extends GrammaticalRelationAnnota */ public static final GrammaticalRelation NOMINAL_PASSIVE_SUBJECT = new GrammaticalRelation(Language.English, "nsubjpass", "nominal passive subject", - NominalPassiveSubjectGRAnnotation.class, NOMINAL_SUBJECT, "S|SQ", tregexCompiler, + NOMINAL_SUBJECT, "S|SQ", tregexCompiler, new String[] { "S|SQ < (WHNP|NP=target !< EX) < (VP < (/^(?:VB|AUX)/ < " + passiveAuxWordRegex + ") < (VP < VBN|VBD))", }); - public static class NominalPassiveSubjectGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -438,11 +425,10 @@ public static class NominalPassiveSubjectGRAnnotation extends GrammaticalRelatio */ public static final GrammaticalRelation CLAUSAL_SUBJECT = new GrammaticalRelation(Language.English, "csubj", "clausal subject", - ClausalSubjectGRAnnotation.class, SUBJECT, "S", tregexCompiler, + SUBJECT, "S", tregexCompiler, new String[] { "S < (SBAR|S=target !$+ /^,$/ $++ (VP !$-- NP))" }); - public static class ClausalSubjectGRAnnotation extends GrammaticalRelationAnnotation { } @@ -455,12 +441,11 @@ public static class ClausalSubjectGRAnnotation extends GrammaticalRelationAnnota */ public static final GrammaticalRelation CLAUSAL_PASSIVE_SUBJECT = new GrammaticalRelation(Language.English, "csubjpass", "clausal passive subject", - ClausalPassiveSubjectGRAnnotation.class, CLAUSAL_SUBJECT, "S", tregexCompiler, + CLAUSAL_SUBJECT, "S", tregexCompiler, new String[] { "S < (SBAR|S=target !$+ /^,$/ $++ (VP < (VP < VBN|VBD) < (/^(?:VB|AUXG?)/ < " + passiveAuxWordRegex + ") !$-- NP))", "S < (SBAR|S=target !$+ /^,$/ $++ (VP <+(VP) (VP < VBN|VBD > (VP < (/^(?:VB|AUX)/ < " + passiveAuxWordRegex + "))) !$-- NP))" }); - public static class ClausalPassiveSubjectGRAnnotation extends GrammaticalRelationAnnotation { } @@ -479,9 +464,7 @@ public static class ClausalPassiveSubjectGRAnnotation extends GrammaticalRelatio * comp(like, to swim) */ public static final GrammaticalRelation COMPLEMENT = - new GrammaticalRelation(Language.English, "comp", "complement", - ComplementGRAnnotation.class, ARGUMENT); - public static class ComplementGRAnnotation extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.English, "comp", "complement", ARGUMENT); /** @@ -496,9 +479,7 @@ public static class ComplementGRAnnotation extends GrammaticalRelationAnnotation * obj(gave, raise) */ public static final GrammaticalRelation OBJECT = - new GrammaticalRelation(Language.English, "obj", "object", - ObjectGRAnnotation.class, COMPLEMENT); - public static class ObjectGRAnnotation extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.English, "obj", "object", COMPLEMENT); /** @@ -514,11 +495,11 @@ public static class ObjectGRAnnotation extends GrammaticalRelationAnnotation { } */ public static final GrammaticalRelation DIRECT_OBJECT = new GrammaticalRelation(Language.English, "dobj", "direct object", - DirectObjectGRAnnotation.class, OBJECT, "VP|SQ|SBARQ?", tregexCompiler, + OBJECT, "VP|SQ|SBARQ?", tregexCompiler, new String[] { // basic direct object cases: last non-temporal NP of (non-copula) clause. This case is good. // You can't exclude "lot" in this case since people can "sell a lot" though it sometimes wrongly matches what should be an advmod like "He's done a lot" (even for the second instance, the one case admitted on PTB3 WSJ is good). - "VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (NP|WHNP=target [ [ !<# (/^NN/ < " + timeWordRegex + ") !$+ NP ] | $+ NP-TMP | $+ (NP <# (/^NN/ < " + timeWordRegex + ")) ] ) " + + "VP !< (/^(?:VB|AUX)/ [ < " + copularWordRegex + " | < " + clausalComplementRegex + " ]) < (NP|WHNP=target [ [ !<# (/^NN/ < " + timeWordRegex + ") !$+ NP ] | $+ NP-TMP | $+ (NP <# (/^NN/ < " + timeWordRegex + ")) ] ) " + // The next qualification eliminates parentheticals that // come after the actual dobj " <# (__ !$++ (NP $++ (/^[:]$/ $++ =target))) ", @@ -574,7 +555,6 @@ public static class ObjectGRAnnotation extends GrammaticalRelationAnnotation { } // we now don't match "VBG > PP $+ NP=target", since it seems better to CM to regard these quasi preposition uses (like "including soya") as prepositions rather than verbs with objects -- that's certainly what the phrase structure at least suggests in the PTB. They're now matched as pobj }); - public static class DirectObjectGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -589,7 +569,7 @@ public static class DirectObjectGRAnnotation extends GrammaticalRelationAnnotati */ public static final GrammaticalRelation INDIRECT_OBJECT = new GrammaticalRelation(Language.English, "iobj", "indirect object", - IndirectObjectGRAnnotation.class, OBJECT, "VP", tregexCompiler, + OBJECT, "VP", tregexCompiler, new String[] { "VP < (NP=target !< /\\$/ !<# (/^NN/ < " + timeWordRegex + ") $+ (NP !<# (/^NN/ < " + timeWordRegex + ")))", // this next one was meant to fix common mistakes of our parser, but is perhaps too dangerous to keep @@ -597,7 +577,6 @@ public static class DirectObjectGRAnnotation extends GrammaticalRelationAnnotati // excluding DT leaves out phrases such as "My dog ate it all"" "VP < (NP=target < (NP !< /\\$/ $++ (NP !<: (PRP < " + selfRegex + ") !<: DT !< (/^NN/ < " + timeWordLotRegex + ")) !$ CC|CONJP !$ /^,$/ !$++ /^:$/))", }); - public static class IndirectObjectGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -622,7 +601,7 @@ public static class IndirectObjectGRAnnotation extends GrammaticalRelationAnnota */ public static final GrammaticalRelation PREPOSITIONAL_OBJECT = new GrammaticalRelation(Language.English, "pobj", "prepositional object", - PrepositionalObjectGRAnnotation.class, OBJECT, "SBARQ|PP(?:-TMP)?|WHPP|PRT|ADVP|WHADVP|XS", tregexCompiler, + OBJECT, "SBARQ|PP(?:-TMP)?|WHPP|PRT|ADVP|WHADVP|XS", tregexCompiler, new String[] { // "not" should not be the cause of a pobj "/^(?:PP(?:-TMP)?|(?:WH)?(?:PP|ADVP))$/ < (SYM|IN|VBG|VBN|TO|FW|RB|RBR $++ (/^(?:WH)?(?:NP|ADJP)(?:-TMP|-ADV)?$/=target !$- @NP) !< /^(?i:not)$/)", @@ -643,7 +622,6 @@ public static class IndirectObjectGRAnnotation extends GrammaticalRelationAnnota "SBARQ < (WHNP=target $++ ((/^(?:VB|AUX)/ < " + copularWordRegex + ") $++ (ADJP=adj < (PP !< NP)) $++ (NP $++ =adj)))", }); - public static class PrepositionalObjectGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -664,13 +642,12 @@ public static class PrepositionalObjectGRAnnotation extends GrammaticalRelationA */ public static final GrammaticalRelation PREPOSITIONAL_COMPLEMENT = new GrammaticalRelation(Language.English, "pcomp", "prepositional complement", - PrepositionalComplementGRAnnotation.class, COMPLEMENT, "(?:WH)?PP(?:-TMP)?", tregexCompiler, + COMPLEMENT, "(?:WH)?PP(?:-TMP)?", tregexCompiler, new String[] { "@PP|WHPP < (IN|VBG|VBN|TO $+ @SBAR|S|PP|ADVP=target)", // no intervening NP; VBN is for "compared with" "@PP|WHPP < (RB $+ @SBAR|S=target)", // RB is for weird tagging like "after/RB adjusting for inflation" "@PP|WHPP !< IN|TO < (SBAR=target <, (IN $+ S))", }); - public static class PrepositionalComplementGRAnnotation extends GrammaticalRelationAnnotation { } // /** @@ -683,7 +660,7 @@ public static class PrepositionalComplementGRAnnotation extends GrammaticalRelat // */ // public static final GrammaticalRelation ATTRIBUTIVE = // new GrammaticalRelation(Language.English, "attr", "attributive", - // AttributiveGRAnnotation.class, COMPLEMENT, "VP|SBARQ|SQ", tregexCompiler, + // COMPLEMENT, "VP|SBARQ|SQ", tregexCompiler, // new String[] { // "VP < NP=target <(/^(?:VB|AUX)/ < " + copularWordRegex + ") !$ (NP < EX)", // // "What is that?" @@ -693,7 +670,6 @@ public static class PrepositionalComplementGRAnnotation extends GrammaticalRelat // // "Is he the man?" // "SQ <, (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (NP=target $-- (NP !< EX))" // }); - // public static class AttributiveGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -718,7 +694,7 @@ public static class PrepositionalComplementGRAnnotation extends GrammaticalRelat */ public static final GrammaticalRelation CLAUSAL_COMPLEMENT = new GrammaticalRelation(Language.English, "ccomp", "clausal complement", - ClausalComplementGRAnnotation.class, COMPLEMENT, "VP|SINV|S|ADJP|ADVP|NP(?:-.*)?", tregexCompiler, + COMPLEMENT, "VP|SINV|S|ADJP|ADVP|NP(?:-.*)?", tregexCompiler, new String[] { // note if you add more words in the pattern, be sure to add them in the ADV_CLAUSE_MODIFIER too! "VP < (S=target < (VP !<, TO|VBG|VBN) !$-- NP)", "VP < (SBAR=target < (S <+(S) VP) <, (IN|DT < /^(?i:that|whether)$/))", @@ -748,7 +724,6 @@ public static class PrepositionalComplementGRAnnotation extends GrammaticalRelat // Note that we eliminate SBAR which also match an vmod pattern "@NP < JJ|NN|NNS < (SBAR=target [ !<(S < (VP < TO )) | !$-- NP|NN|NNP|NNS ] )" }); - public static class ClausalComplementGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -767,7 +742,7 @@ public static class ClausalComplementGRAnnotation extends GrammaticalRelationAnn */ public static final GrammaticalRelation XCLAUSAL_COMPLEMENT = new GrammaticalRelation(Language.English, "xcomp", "xclausal complement", - XClausalComplementGRAnnotation.class, COMPLEMENT, "VP|ADJP|SINV", tregexCompiler, + COMPLEMENT, "VP|ADJP|SINV", tregexCompiler, new String[] { // basic VP complement xcomp; this used to exclude embedding under a VP headed by be, as some are purpose clauses, but it seems like the vast majority aren't so I've removed that restriction // one way to detect purpose clauses is to look for an NP before the S, though @@ -775,7 +750,7 @@ public static class ClausalComplementGRAnnotation extends GrammaticalRelationAnn "ADJP < (S=target <, (VP <, TO))", "VP < (S=target !$- (NN < order) < (NP $+ NP|ADJP))", // to find "help sustain ... - "VP < (/^(?:VB|AUX)/ $+ (VP=target < VB < NP))", + "VP <# (/^(?:VB|AUX)/ $+ (VP=target < VB|VBG))", "VP < (SBAR=target < (S !$- (NN < order) < (VP < TO))) !> (VP < (VB|AUX < be)) ", "VP < (S=target !$- (NN < order) <: NP) > VP", "VP < (/^VB/ $+ (@S=target < (@ADJP < /^JJ/ ! $-- @NP|S))) $-- (/^VB/ < " + copularWordRegex + " )", @@ -786,13 +761,15 @@ public static class ClausalComplementGRAnnotation extends GrammaticalRelationAnn // Detects xcomp(becoming, requirement) in "Hand-holding is becoming an investment banking job requirement" // Also, xcomp(becoming, problem) in "Why is Dave becoming a problem?" "(VP $-- (/^(?:VB|AUX)/ < " + copularWordRegex + ") < (/^VB/ < " + clausalComplementRegex + ") < NP=target)", - + "VP < (/^(?:VB|AUX)/ < " + clausalComplementRegex + ") < (NP|WHNP=target [ [ !<# (/^NN/ < " + timeWordRegex + ") !$+ NP ] | $+ NP-TMP | $+ (NP <# (/^NN/ < " + timeWordRegex + ")) ] ) " + + // The next qualification eliminates parentheticals that + // come after the actual dobj + " <# (__ !$++ (NP $++ (/^[:]$/ $++ =target))) ", // The old attr relation, used here to recover xcomp relations instead. "VP=vp < NP=target <(/^(?:VB|AUX)/ < " + copularWordRegex + " >># =vp) !$ (NP < EX)", // "Such a great idea this was" if "was" is the root, eg -makeCopulaHead "SINV <# (VP < (/^(?:VB|AUX)/ < " + copularWordRegex + ") $-- (NP $-- NP=target))", }); - public static class XClausalComplementGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -803,12 +780,11 @@ public static class XClausalComplementGRAnnotation extends GrammaticalRelationAn */ public static final GrammaticalRelation RELATIVE = new GrammaticalRelation(Language.English, "rel", "relative", - RelativeGRAnnotation.class, COMPLEMENT, "SBAR", tregexCompiler, + COMPLEMENT, "SBAR", tregexCompiler, new String[] { // matches non-subject, not clearly direct object in relative clauses "I saw the book that you bought" "SBAR < (WHNP=target !< WRB) < (S < NP < (VP [ < SBAR | <+(VP) (PP <- IN|TO) | < (S < (VP < TO)) ] ))", }); - public static class RelativeGRAnnotation extends GrammaticalRelationAnnotation { } /** * The "referent" grammatical relation. A @@ -821,9 +797,7 @@ public static class RelativeGRAnnotation extends GrammaticalRelationAnnotation { * ref(book, which) */ public static final GrammaticalRelation REFERENT = - new GrammaticalRelation(Language.English, "ref", "referent", - ReferentGRAnnotation.class, DEPENDENT); - public static class ReferentGRAnnotation extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.English, "ref", "referent", DEPENDENT); @@ -838,11 +812,10 @@ public static class ReferentGRAnnotation extends GrammaticalRelationAnnotation { */ public static final GrammaticalRelation EXPLETIVE = new GrammaticalRelation(Language.English, "expl", "expletive", - ExpletiveGRAnnotation.class, DEPENDENT, "S|SQ|SINV", tregexCompiler, + DEPENDENT, "S|SQ|SINV", tregexCompiler, new String[] { "S|SQ|SINV < (NP=target <+(NP) EX)" }); - public static class ExpletiveGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -858,7 +831,7 @@ public static class ExpletiveGRAnnotation extends GrammaticalRelationAnnotation */ public static final GrammaticalRelation ADJECTIVAL_COMPLEMENT = new GrammaticalRelation(Language.English, "acomp", "adjectival complement", - AdjectivalComplementGRAnnotation.class, COMPLEMENT, "VP", tregexCompiler, + COMPLEMENT, "VP", tregexCompiler, new String[] { // ADJP=target used to be limited by !$-- NP, but that // stopped the converter from finding the right dependency @@ -866,9 +839,12 @@ public static class ExpletiveGRAnnotation extends GrammaticalRelationAnnotation // The second half of the expression leaves out relations // which should be xcomp because they are actually // passivized verbs - "VP [ < ADJP=target | ( < (/^VB/ $+ (@S=target < (@ADJP < /^JJ/ ! $-- @NP|S))) !$-- (/^VB/ < " + copularWordRegex + " )) ]", + // Phrases such as "remained banned" "seem headed down" etc + // are captured by (clausalComplementRegex $++ VP) + // We combine them all into one tregex expression to save a + // few milliseconds of runtime + "VP [ < ADJP=target | ( < (/^VB/ [ ( < " + clausalComplementRegex + " $++ VP=target ) | $+ (@S=target < (@ADJP < /^JJ/ ! $-- @NP|S)) ] ) !$-- (/^VB/ < " + copularWordRegex + " )) ]", }); - public static class AdjectivalComplementGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -884,9 +860,7 @@ public static class AdjectivalComplementGRAnnotation extends GrammaticalRelation * mod(swam, last night) */ public static final GrammaticalRelation MODIFIER = - new GrammaticalRelation(Language.English, "mod", "modifier", - ModifierGRAnnotation.class, DEPENDENT); - public static class ModifierGRAnnotation extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.English, "mod", "modifier", DEPENDENT); /** @@ -902,7 +876,7 @@ public static class ModifierGRAnnotation extends GrammaticalRelationAnnotation { */ public static final GrammaticalRelation ADV_CLAUSE_MODIFIER = new GrammaticalRelation(Language.English, "advcl", "adverbial clause modifier", - AdvClauseModifierGRAnnotation.class, MODIFIER, "VP|S|SQ|SINV|SBARQ|NP", tregexCompiler, + MODIFIER, "VP|S|SQ|SINV|SBARQ|NP", tregexCompiler, new String[] { // first case includes regular in order to purpose clauses // second disjunct matches inverted "had he investigated" cases @@ -947,7 +921,6 @@ public static class ModifierGRAnnotation extends GrammaticalRelationAnnotation { }); - public static class AdvClauseModifierGRAnnotation extends GrammaticalRelationAnnotation { } /* @@ -980,7 +953,7 @@ public static class AdvClauseModifierGRAnnotation extends GrammaticalRelationAnn */ public static final GrammaticalRelation RELATIVE_CLAUSE_MODIFIER = new GrammaticalRelation(Language.English, "rcmod", "relative clause modifier", - RelativeClauseModifierGRAnnotation.class, MODIFIER, "(?:WH)?(?:NP|NML|ADVP)(?:-.*)?", tregexCompiler, + MODIFIER, "(?:WH)?(?:NP|NML|ADVP)(?:-.*)?", tregexCompiler, new String[] { // Each of the following expressions includes a section // which makes sure it does not have a left sister @@ -1005,7 +978,6 @@ public static class AdvClauseModifierGRAnnotation extends GrammaticalRelationAnn "@ADVP < (@ADVP < (RB < /where$/)) < @SBAR=target", "NP < (NP $++ (SBAR=target !< (IN < /^(?i:than|that|whether)$/) !< (WHPP|WHNP|WHADVP) < (S < (@NP $++ (VP !< (/^(?:VB|AUX)/ < " + copularWordRegex + " !$+ VP) !<+(VP) (/^(?:VB|AUX)/ < " + copularWordRegex + " $+ (VP < VBN|VBD)) !<+(VP) NP !< SBAR !<+(VP) (PP <- IN|TO)))) !<: (S !< (VP < TO))) !$++ (CC $++ =target))" }); - public static class RelativeClauseModifierGRAnnotation extends GrammaticalRelationAnnotation { } /* @@ -1032,12 +1004,11 @@ public static class RelativeClauseModifierGRAnnotation extends GrammaticalRelati */ public static final GrammaticalRelation MARKER = new GrammaticalRelation(Language.English, "mark", "marker", - MarkerGRAnnotation.class, MODIFIER, "SBAR(?:-TMP)?", tregexCompiler, + MODIFIER, "SBAR(?:-TMP)?", tregexCompiler, new String[] { "SBAR|SBAR-TMP < (IN|DT=target $++ S|FRAG)", "SBAR < (IN|DT=target < that|whether) [ $-- /^(?:VB|AUX)/ | $- NP|NN|NNS | > ADJP|PP | > (@NP|UCP|SBAR < CC|CONJP $-- /^(?:VB|AUX)/) ]", }); - public static class MarkerGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1058,7 +1029,7 @@ public static class MarkerGRAnnotation extends GrammaticalRelationAnnotation { } */ public static final GrammaticalRelation ADJECTIVAL_MODIFIER = new GrammaticalRelation(Language.English, "amod", "adjectival modifier", - AdjectivalModifierGRAnnotation.class, MODIFIER, "NP(?:-TMP|-ADV)?|NX|NML|NAC|WHNP|ADJP", tregexCompiler, + MODIFIER, "NP(?:-TMP|-ADV)?|NX|NML|NAC|WHNP|ADJP", tregexCompiler, new String[] { // QP !< $ is so phrases such as "$ 100 million buyout" get amod(buyout, $) "/^(?:NP(?:-TMP|-ADV)?|NX|NML|NAC|WHNP)$/ < (ADJP|WHADJP|JJ|JJR|JJS|JJP|VBN|VBG|VBD|IN=target !< (QP !< /^[$]$/) !$- CC)", @@ -1067,7 +1038,6 @@ public static class MarkerGRAnnotation extends GrammaticalRelationAnnotation { } // Cover the case of "John, 34, works at Stanford" - similar to an expression for appos "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (NP=target <: CD $- /^,$/ $-- /^(?:WH)?NP/ !$ CC|CONJP)", }); - public static class AdjectivalModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1081,7 +1051,7 @@ public static class AdjectivalModifierGRAnnotation extends GrammaticalRelationAn */ public static final GrammaticalRelation NUMERIC_MODIFIER = new GrammaticalRelation(Language.English, "num", "numeric modifier", - NumericModifierGRAnnotation.class, MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NML|NX|ADJP|WHADJP|QP", tregexCompiler, + MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NML|NX|ADJP|WHADJP|QP", tregexCompiler, new String[] { "/^(?:WH)?(?:NP|NX|NML)(?:-TMP|-ADV)?$/ < (CD|QP=target !$- CC)", // $ is so phrases such as "$ 100 million buyout" get amod(buyout, $) @@ -1093,7 +1063,6 @@ public static class AdjectivalModifierGRAnnotation extends GrammaticalRelationAn // as in the phrase "$ 100 million or more". In that case, this next expression is needed. "QP < QP=target < /^[$]$/" }); - public static class NumericModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1106,11 +1075,10 @@ public static class NumericModifierGRAnnotation extends GrammaticalRelationAnnot */ public static final GrammaticalRelation NUMBER_MODIFIER = new GrammaticalRelation(Language.English, "number", "compound number modifier", - NumberModifierGRAnnotation.class, MODIFIER, "QP|ADJP", tregexCompiler, + MODIFIER, "QP|ADJP", tregexCompiler, new String[] { "QP|ADJP < (/^(?:CD|$|#)$/=target !$- CC)" }); - public static class NumberModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1123,11 +1091,10 @@ public static class NumberModifierGRAnnotation extends GrammaticalRelationAnnota */ public static final GrammaticalRelation QUANTIFIER_MODIFIER = new GrammaticalRelation(Language.English, "quantmod", "quantifier modifier", - QuantifierModifierGRAnnotation.class, MODIFIER, "QP", tregexCompiler, + MODIFIER, "QP", tregexCompiler, new String[] { "QP < IN|RB|RBR|RBS|PDT|DT|JJ|JJR|JJS|XS=target" }); - public static class QuantifierModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1151,7 +1118,7 @@ public static class QuantifierModifierGRAnnotation extends GrammaticalRelationAn */ public static final GrammaticalRelation NOUN_COMPOUND_MODIFIER = new GrammaticalRelation(Language.English, "nn", "nn modifier", - NounCompoundModifierGRAnnotation.class, MODIFIER, "(?:WH)?(?:NP|NX|NAC|NML|ADVP|ADJP)(?:-TMP|-ADV)?", tregexCompiler, + MODIFIER, "(?:WH)?(?:NP|NX|NAC|NML|ADVP|ADJP)(?:-TMP|-ADV)?", tregexCompiler, new String[] { // added AFX: can't really tell it's natural POS; this seems the best one can do // The check for POS is to eliminate conflicts with poss relations @@ -1161,7 +1128,6 @@ public static class QuantifierModifierGRAnnotation extends GrammaticalRelationAn // matches against "etc etc" "ADJP|ADVP < (FW [ $- (FW=target !< /^(?i:etc)$/) | $- (IN=target < in|In) ] )", }); - public static class NounCompoundModifierGRAnnotation extends GrammaticalRelationAnnotation { } /* * There used to be a relation "abbrev" for when abbreviations were defined in brackets after a noun @@ -1184,7 +1150,7 @@ public static class NounCompoundModifierGRAnnotation extends GrammaticalRelation */ public static final GrammaticalRelation APPOSITIONAL_MODIFIER = new GrammaticalRelation(Language.English, "appos", "appositional modifier", - AppositionalModifierGRAnnotation.class, MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?", tregexCompiler, + MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?", tregexCompiler, new String[] { // Note that we disallow a single CD node as the child of // the NP. This eliminates numbers being used as ages, @@ -1204,7 +1170,6 @@ public static class NounCompoundModifierGRAnnotation extends GrammaticalRelation "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (NP=target <: NNP $- (/^(?:WH)?NP/ !< POS)) !< CC|CONJP !< " + FW_ETC_PAT + " !< " + ETC_PAT, }); - public static class AppositionalModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1218,14 +1183,13 @@ public static class AppositionalModifierGRAnnotation extends GrammaticalRelation */ public static final GrammaticalRelation DISCOURSE_ELEMENT = new GrammaticalRelation(Language.English, "discourse", "discourse element", - DiscourseElementGRAnnotation.class, MODIFIER, ".*", tregexCompiler, + MODIFIER, ".*", tregexCompiler, new String[] { // smiley faces (escaped), based on Chris Potts' sentiment tutorial "__ < (NFP=target [ < " + WESTERN_SMILEY + " | < " + ASIAN_SMILEY + " ] )", "__ [ < INTJ=target | < (PRN=target <1 /^(?:,|-LRB-)$/ <2 INTJ [ !<3 __ | <3 /^(?:,|-RRB-)$/ ] ) ]" }); - public static class DiscourseElementGRAnnotation extends GrammaticalRelationAnnotation { } @@ -1246,7 +1210,7 @@ public static class DiscourseElementGRAnnotation extends GrammaticalRelationAnno */ public static final GrammaticalRelation VERBAL_MODIFIER = new GrammaticalRelation(Language.English, "vmod", "verb modifier", - VerbalModifierGRAnnotation.class, MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NML|NX|VP|S|SINV|SBARQ", tregexCompiler, + MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NML|NX|VP|S|SINV|SBARQ", tregexCompiler, new String[] { "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV|NML|NX < (VP=target < VBG|VBN|VBD $-- @NP|NML|NX)", // also allow VBD since it quite often occurs in treebank errors and parse errors // to get "MBUSA, headquartered ..." @@ -1267,7 +1231,6 @@ public static class DiscourseElementGRAnnotation extends GrammaticalRelationAnno "/^NP(?:-[A-Z]+)?$/ < (SBAR=target < (S < (VP < TO)) $-- NP|NN|NNP|NNS)", "SBARQ < WHNP < (S=target < (VP <1 TO))", }); - public static class VerbalModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1283,7 +1246,7 @@ public static class VerbalModifierGRAnnotation extends GrammaticalRelationAnnota */ public static final GrammaticalRelation ADVERBIAL_MODIFIER = new GrammaticalRelation(Language.English, "advmod", "adverbial modifier", - AdverbialModifierGRAnnotation.class, MODIFIER, + MODIFIER, "VP|ADJP|WHADJP|ADVP|WHADVP|S|SBAR|SINV|SQ|SBARQ|XS|(?:WH)?(?:PP|NP)(?:-TMP|-ADV)?|RRC|CONJP|JJP", tregexCompiler, new String[] { "/^(?:VP|ADJP|JJP|WHADJP|SQ?|SBARQ?|SINV|XS|RRC|(?:WH)?NP(?:-TMP|-ADV)?)$/ < (RB|RBR|RBS|WRB|ADVP|WHADVP=target !< " + NOT_PAT + " !< " + ETC_PAT + ")", @@ -1300,7 +1263,6 @@ public static class VerbalModifierGRAnnotation extends GrammaticalRelationAnnota "/(?:WH)?PP(?:-TMP|-ADV)?$/ < @NP|WHNP < (RB|RBR|RBS|WRB|ADVP|WHADVP=target !< " + NOT_PAT + " !< " + ETC_PAT + ")", "CONJP < (RB=target !< " + NOT_PAT + " !< " + ETC_PAT + ")", }); - public static class AdverbialModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1315,7 +1277,7 @@ public static class AdverbialModifierGRAnnotation extends GrammaticalRelationAnn */ public static final GrammaticalRelation NEGATION_MODIFIER = new GrammaticalRelation(Language.English, "neg", "negation modifier", - NegationModifierGRAnnotation.class, ADVERBIAL_MODIFIER, + ADVERBIAL_MODIFIER, "VP|ADJP|S|SBAR|SINV|SQ|NP(?:-TMP|-ADV)?|FRAG|CONJP|PP|NAC|NML|NX|ADVP|WHADVP", tregexCompiler, new String[] { "/^(?:VP|NP(?:-TMP|-ADV)?|ADJP|SQ|S|FRAG|CONJP|PP)$/< (RB=target < " + NOT_PAT + ")", @@ -1328,7 +1290,6 @@ public static class AdverbialModifierGRAnnotation extends GrammaticalRelationAnn // !< CC|CONJP catches phrases such as "no more or less", which maybe should be preconj "ADVP|WHADVP < (RB|RBR|RBS|WRB|ADVP|WHADVP|JJ=target < /^(?i:no)$/) !< CC|CONJP", }); - public static class NegationModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1373,7 +1334,7 @@ public static class NegationModifierGRAnnotation extends GrammaticalRelationAnno */ public static final GrammaticalRelation NP_ADVERBIAL_MODIFIER = new GrammaticalRelation(Language.English, "npadvmod", "noun phrase adverbial modifier", - NpAdverbialModifierGRAnnotation.class, MODIFIER, "VP|(?:WH)?(?:NP|ADJP|ADVP|PP)(?:-TMP|-ADV)?", tregexCompiler, + MODIFIER, "VP|(?:WH)?(?:NP|ADJP|ADVP|PP)(?:-TMP|-ADV)?", tregexCompiler, new String[] { // measure phrases pattern (don't allow VBG/VBN cases, as often participles) "@ADVP|ADJP|WHADJP|WHADVP|PP|WHPP <# (JJ|JJR|IN|RB|RBR !< notwithstanding $- (@NP=target !< NNP|NNPS))", @@ -1388,7 +1349,6 @@ public static class NegationModifierGRAnnotation extends GrammaticalRelationAnno "@NP <1 (@NP <<# /^%$/) <2 (@NP=target <<# days|month|months) !<3 __", "@VP < /^NP-ADV/=target", }); - public static class NpAdverbialModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1404,7 +1364,7 @@ public static class NpAdverbialModifierGRAnnotation extends GrammaticalRelationA */ public static final GrammaticalRelation TEMPORAL_MODIFIER = new GrammaticalRelation(Language.English, "tmod", "temporal modifier", - TemporalModifierGRAnnotation.class, NP_ADVERBIAL_MODIFIER, "VP|S|ADJP|PP|SBAR|SBARQ|NP|RRC", tregexCompiler, + NP_ADVERBIAL_MODIFIER, "VP|S|ADJP|PP|SBAR|SBARQ|NP|RRC", tregexCompiler, new String[] { // VP <# NP-TMP is for phrases which might be parsed as VP over an empty verb such as // "Yesterday I went running, but I couldn't today" @@ -1419,7 +1379,6 @@ public static class NpAdverbialModifierGRAnnotation extends GrammaticalRelationA "SBARQ < (@WHNP=target <# (/^NN/ < " + timeWordRegex + ")) < (SQ < @NP)", "NP < NP-TMP=target" }); - public static class TemporalModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1438,7 +1397,7 @@ public static class TemporalModifierGRAnnotation extends GrammaticalRelationAnno */ public static final GrammaticalRelation MULTI_WORD_EXPRESSION = new GrammaticalRelation(Language.English, "mwe", "multi-word expression", - MultiWordExpressionGRAnnotation.class, MODIFIER, "PP|XS|ADVP|CONJP", tregexCompiler, + MODIFIER, "PP|XS|ADVP|CONJP", tregexCompiler, new String[] { "PP|XS < (IN|TO < as|of|at|to|in) < (JJ|IN|JJR|JJS|NN=target < such|because|Because|least|instead|due|Due|addition|to)", "ADVP < (RB|IN < well) < (IN|RB|JJS=target < as)", @@ -1449,7 +1408,6 @@ public static class TemporalModifierGRAnnotation extends GrammaticalRelationAnno // todo: note inconsistent head finding for "rather than"! "XS < JJR|JJS=target" // more than, fewer than, well over -- maybe change some of these? }); - public static class MultiWordExpressionGRAnnotation extends GrammaticalRelationAnnotation { } /* mihai: this block needs to be uncommented to get the KBP 2010 system to work (due to the cached sentences using old code) * (Note: in 2011, the measure phrase relation was collapsed into the scope of npadvmod, rather than being separated out.) @@ -1463,12 +1421,11 @@ public static class MultiWordExpressionGRAnnotation extends GrammaticalRelationA * public static final GrammaticalRelation MEASURE_PHRASE = new GrammaticalRelation(Language.English, "measure", "measure-phrase", - MeasurePhraseGRAnnotation.class, MODIFIER, "ADJP|ADVP", tregexCompiler, + MODIFIER, "ADJP|ADVP", tregexCompiler, new String[] { "ADJP <- JJ <, (NP=target !< NNP)", "ADVP|ADJP <# (JJ|IN $- NP=target)" }); - public static class MeasurePhraseGRAnnotation extends GrammaticalRelationAnnotation { } */ // mihai: end block /** @@ -1482,7 +1439,7 @@ public static class MeasurePhraseGRAnnotation extends GrammaticalRelationAnnotat */ public static final GrammaticalRelation DETERMINER = new GrammaticalRelation(Language.English, "det", "determiner", - DeterminerGRAnnotation.class, MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NAC|NML|NX|X|ADVP|ADJP", tregexCompiler, + MODIFIER, "(?:WH)?NP(?:-TMP|-ADV)?|NAC|NML|NX|X|ADVP|ADJP", tregexCompiler, new String[] { // For this relation, we do not want to trigger if there are // possessive nodes of some kind between the target DT and @@ -1503,7 +1460,6 @@ public static class MeasurePhraseGRAnnotation extends GrammaticalRelationAnnotat "@WHNP|ADVP|ADJP < (/^(?:NP|NN|CD|RBS|JJ)/ $-- (DT|WDT|WP=target !< /^(?i:no)$/ [ ==WDT|WP | !$++ CC|CONJP ]))", "@NP < (/^(?:NP|NN|CD|RBS)/ $-- WDT|WP=target)" }); - public static class DeterminerGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1514,13 +1470,12 @@ public static class DeterminerGRAnnotation extends GrammaticalRelationAnnotation */ public static final GrammaticalRelation PREDETERMINER = new GrammaticalRelation(Language.English, "predet", "predeterminer", - PredeterminerGRAnnotation.class, MODIFIER, "(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?", tregexCompiler, + MODIFIER, "(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?", tregexCompiler, new String[] { "/^(?:(?:WH)?NP(?:-TMP|-ADV)?|NX|NAC|NML)$/ < (PDT|DT=target $+ /^(?:DT|WP\\$|PRP\\$)$/ $++ /^(?:NN|NX|NML)/ !$++ CC)", "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (PDT|DT=target $+ DT $++ (/^JJ/ !$+ /^NN/)) !$++ CC", "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < PDT=target <- DT" }); - public static class PredeterminerGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1531,7 +1486,7 @@ public static class PredeterminerGRAnnotation extends GrammaticalRelationAnnotat */ public static final GrammaticalRelation PRECONJUNCT = new GrammaticalRelation(Language.English, "preconj", "preconjunct", - PreconjunctGRAnnotation.class, MODIFIER, + MODIFIER, "S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR|NP(?:-TMP|-ADV)?", tregexCompiler, new String[] { // "/^NP(?:-TMP|-ADV)?|NX|NML$/ < (PDT|CC=target < /^(?i:either|neither|both)$/ [ $++ /^N[NXM]/ | $++ (/^JJ/ !$+ /^NN/) ] ) $++ CC", // cdm Jun 2010: This pattern was matching nothing, since it was looking for the second CC as sister of the top NP not of the preconjunct. But if you move the close parenthesis right, you'd just get a more restricted form of the second pattern.... @@ -1543,7 +1498,6 @@ public static class PredeterminerGRAnnotation extends GrammaticalRelationAnnotat "/^S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR$/ < (PDT|DT|CC=target < /^(?i:either|neither|both)$/ $++ CC)", "/^S|VP|ADJP|PP|ADVP|UCP(?:-TMP|-ADV)?|NX|NML|SBAR$/ < (CONJP=target < (RB < /^(?i:not)$/) < (RB|JJ < /^(?i:only|merely|just)$/) $++ CC|CONJP)" }); - public static class PreconjunctGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1557,7 +1511,7 @@ public static class PreconjunctGRAnnotation extends GrammaticalRelationAnnotatio */ public static final GrammaticalRelation POSSESSION_MODIFIER = new GrammaticalRelation(Language.English, "poss", "possession modifier", - PossessionModifierGRAnnotation.class, MODIFIER, "(?:WH)?(NP|ADJP|INTJ|PRN|NAC|NX|NML)(?:-.*)?", tregexCompiler, + MODIFIER, "(?:WH)?(NP|ADJP|INTJ|PRN|NAC|NX|NML)(?:-.*)?", tregexCompiler, new String[] { // possessive pronouns like "my", "whose"; [cdm 2010: Simplified; extra checks seemed unneeded (INTJ for "oh my god", though maybe it should really have internal NP....) "/^(?:WH)?(?:NP|INTJ|ADJP|PRN|NAC|NX|NML)(?:-.*)?$/ < /^(?:WP\\$|PRP\\$)$/=target", @@ -1568,7 +1522,6 @@ public static class PreconjunctGRAnnotation extends GrammaticalRelationAnnotatio // note that ' matches both ' and 's "/^(?:WH)?(?:NP|NML|NX)(?:-.*)?$/ < (/^NN|NP/=target $++ (POS=pos < /\'/ $++ /^NN/) !$++ (/^NN|NP/ $++ =pos))" }); - public static class PossessionModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1581,12 +1534,11 @@ public static class PossessionModifierGRAnnotation extends GrammaticalRelationAn */ public static final GrammaticalRelation POSSESSIVE_MODIFIER = new GrammaticalRelation(Language.English, "possessive", "possessive modifier", - PossessiveModifierGRAnnotation.class, MODIFIER, "(?:WH)?(?:NP|NML)(?:-TMP|-ADV)?", tregexCompiler, + MODIFIER, "(?:WH)?(?:NP|NML)(?:-TMP|-ADV)?", tregexCompiler, new String[] { "/^(?:WH)?(?:NP|NML)(?:-TMP|-ADV)?$/ < POS=target", "/^(?:WH)?(?:NP|NML)(?:-TMP|-ADV)?$/ < (VBZ=target < /^'s$/)" }); - public static class PossessiveModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1606,7 +1558,7 @@ public static class PossessiveModifierGRAnnotation extends GrammaticalRelationAn */ public static final GrammaticalRelation PREPOSITIONAL_MODIFIER = new GrammaticalRelation(Language.English, "prep", "prepositional modifier", - PrepositionalModifierGRAnnotation.class, MODIFIER, ".*", tregexCompiler, + MODIFIER, ".*", tregexCompiler, new String[] { // note that we disallow nodes which are next to a CC or // CONJP, which can happen to a PP when we are analyzing @@ -1620,7 +1572,6 @@ public static class PossessiveModifierGRAnnotation extends GrammaticalRelationAn "SBAR|SBARQ < /^(?:WH)?PP/=target < S|SQ", "@NP < (@UCP|PRN=target <# @PP)", }); - public static class PrepositionalModifierGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1633,12 +1584,11 @@ public static class PrepositionalModifierGRAnnotation extends GrammaticalRelatio */ public static final GrammaticalRelation PHRASAL_VERB_PARTICLE = new GrammaticalRelation(Language.English, "prt", "phrasal verb particle", - PhrasalVerbParticleGRAnnotation.class, MODIFIER, "VP|ADJP", tregexCompiler, + MODIFIER, "VP|ADJP", tregexCompiler, new String[] { "VP < PRT=target", "ADJP < /^VB/ < RP=target" }); - public static class PhrasalVerbParticleGRAnnotation extends GrammaticalRelationAnnotation { } /** @@ -1652,7 +1602,7 @@ public static class PhrasalVerbParticleGRAnnotation extends GrammaticalRelationA */ public static final GrammaticalRelation PARATAXIS = new GrammaticalRelation(Language.English, "parataxis", "parataxis", - ParataxisGRAnnotation.class, DEPENDENT, "S|VP", tregexCompiler, + DEPENDENT, "S|VP", tregexCompiler, new String[]{ "VP < (PRN=target < S|SINV|SBAR)", // parenthetical "VP $ (PRN=target [ < S|SINV|SBAR | < VP < @NP ] )", // parenthetical @@ -1668,7 +1618,6 @@ public static class PhrasalVerbParticleGRAnnotation extends GrammaticalRelationA "@S < (@S|SBARQ $++ @S|SBARQ=target !$++ @CC|CONJP)", "@S|VP < (/^:$/ $-- /^V/ $+ @NP=target) !< @CONJP|CC", // sometimes CC cases are right node raising, etc. }); - public static class ParataxisGRAnnotation extends GrammaticalRelationAnnotation { } /** * The "goes with" grammatical relation. This corresponds to use of the GW (goes with) part-of-speech tag @@ -1680,11 +1629,10 @@ public static class ParataxisGRAnnotation extends GrammaticalRelationAnnotation */ public static final GrammaticalRelation GOES_WITH = new GrammaticalRelation(Language.English, "goeswith", "goes with", - GoesWithGRAnnotation.class, MODIFIER, ".*", tregexCompiler, + MODIFIER, ".*", tregexCompiler, new String[] { "__ < GW=target", }); - public static class GoesWithGRAnnotation extends GrammaticalRelationAnnotation { } @@ -1693,24 +1641,7 @@ public static class GoesWithGRAnnotation extends GrammaticalRelationAnnotation { * introduced as a supertype for the controlling subject relation. */ public static final GrammaticalRelation SEMANTIC_DEPENDENT = - new GrammaticalRelation(Language.English, "sdep", "semantic dependent", - SemanticDependentGRAnnotation.class, DEPENDENT); - public static class SemanticDependentGRAnnotation extends GrammaticalRelationAnnotation { } - - - /** - * The "controlling subject" grammatical relation.

- * A controlling subject is the relation between the head of an xcomp and the external subject - * of that clause. - *

- * Example:
- * "Tom likes to eat fish" → - * xsubj(eat, Tom) - */ - public static final GrammaticalRelation CONTROLLING_SUBJECT = - new GrammaticalRelation(Language.English, "xsubj", "controlling subject", - ControllingSubjectGRAnnotation.class, SEMANTIC_DEPENDENT); - public static class ControllingSubjectGRAnnotation extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.English, "sdep", "semantic dependent", DEPENDENT); /** @@ -1722,9 +1653,7 @@ public static class ControllingSubjectGRAnnotation extends GrammaticalRelationAn * agent(killed, police) */ public static final GrammaticalRelation AGENT = - new GrammaticalRelation(Language.English, "agent", "agent", - AgentGRAnnotation.class, DEPENDENT); - public static class AgentGRAnnotation extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.English, "agent", "agent", DEPENDENT); // TODO would be nice to have this set up automatically... @@ -1790,7 +1719,6 @@ public static class AgentGRAnnotation extends GrammaticalRelationAnnotation { } PREPOSITIONAL_MODIFIER, PHRASAL_VERB_PARTICLE, SEMANTIC_DEPENDENT, - CONTROLLING_SUBJECT, AGENT, NUMBER_MODIFIER, QUANTIFIER_MODIFIER, @@ -1861,9 +1789,6 @@ public static Collection getConjs() { * The "conj" grammatical relation. Used to collapse conjunct relations. * They will be turned into conj_word, where "word" is a conjunction. * - * NOTE: Because these relations lack associated GrammaticalRelationAnnotations, - * they cannot be arcs of a TreeGraphNode. - * * @param conjunctionString The conjunction to make a GrammaticalRelation out of * @return A grammatical relation for this conjunction */ @@ -1873,7 +1798,7 @@ public static GrammaticalRelation getConj(String conjunctionString) { synchronized(conjs) { result = conjs.get(conjunctionString); if (result == null) { - result = new GrammaticalRelation(Language.English, "conj", "conj_collapsed", null, CONJUNCT, conjunctionString); + result = new GrammaticalRelation(Language.English, "conj", "conj_collapsed", CONJUNCT, conjunctionString); conjs.put(conjunctionString, result); threadSafeAddRelation(result); } @@ -1900,9 +1825,6 @@ public static Collection getPrepsC() { * The "prep" grammatical relation. Used to collapse prepositions.

* They will be turned into prep_word, where "word" is a preposition * - * NOTE: Because these relations lack associated GrammaticalRelationAnnotations, - * they cannot be arcs of a TreeGraphNode. - * * @param prepositionString The preposition to make a GrammaticalRelation out of * @return A grammatical relation for this preposition */ @@ -1912,7 +1834,7 @@ public static GrammaticalRelation getPrep(String prepositionString) { synchronized(preps) { result = preps.get(prepositionString); if (result == null) { - result = new GrammaticalRelation(Language.English, "prep", "prep_collapsed", null, PREPOSITIONAL_MODIFIER, prepositionString); + result = new GrammaticalRelation(Language.English, "prep", "prep_collapsed", PREPOSITIONAL_MODIFIER, prepositionString); preps.put(prepositionString, result); threadSafeAddRelation(result); } @@ -1927,9 +1849,6 @@ public static GrammaticalRelation getPrep(String prepositionString) { * complements.

* They will be turned into prep_word, where "word" is a preposition * - * NOTE: Because these relations lack associated GrammaticalRelationAnnotations, - * they cannot be arcs of a TreeGraphNode. - * * @param prepositionString The preposition to make a GrammaticalRelation out of * @return A grammatical relation for this preposition */ @@ -1939,7 +1858,7 @@ public static GrammaticalRelation getPrepC(String prepositionString) { synchronized(prepsC) { result = prepsC.get(prepositionString); if (result == null) { - result = new GrammaticalRelation(Language.English, "prepc", "prepc_collapsed", null, DEPENDENT, prepositionString); + result = new GrammaticalRelation(Language.English, "prepc", "prepc_collapsed", DEPENDENT, prepositionString); prepsC.put(prepositionString, result); threadSafeAddRelation(result); } @@ -1992,12 +1911,6 @@ public static GrammaticalRelation valueOf(String s) { public static GrammaticalRelation valueOf(Object o) { if (o instanceof GrammaticalRelation) { return (GrammaticalRelation) o; - } else if (o instanceof Class) { - try { - return getRelation((Class) o); - } catch (Exception e) { - return null; - } } else if (o instanceof String) { return valueOf((String) o); } else { diff --git a/src/edu/stanford/nlp/trees/EnglishGrammaticalStructure.java b/src/edu/stanford/nlp/trees/EnglishGrammaticalStructure.java index c59363fb56..1d12dc0aaa 100644 --- a/src/edu/stanford/nlp/trees/EnglishGrammaticalStructure.java +++ b/src/edu/stanford/nlp/trees/EnglishGrammaticalStructure.java @@ -117,36 +117,14 @@ public boolean accept(TypedDependency d) { private static final Filter extraTreeDepFilter = new ExtraTreeDepFilter(); - /** - * Tries to return a node representing the SUBJECT (whether - * nominal or clausal) of the given node t. Probably, node - * t should represent a clause or verb phrase. - * - * @param t A node in this GrammaticalStructure - * @return A node which is the subject of node t, or else - * null - */ - public static TreeGraphNode getSubject(TreeGraphNode t) { - TreeGraphNode subj = t.getNodeInRelation(NOMINAL_SUBJECT); - if (subj != null) { - return subj; - } - subj = t.getNodeInRelation(CLAUSAL_SUBJECT); - if (subj != null) { - return subj; - } else { - return t.getNodeInRelation(NOMINAL_PASSIVE_SUBJECT); - } - } - @Override protected void correctDependencies(Collection list) { if (DEBUG) { printListSorted("At correctDependencies:", list); } - correctSubjPassAndPoss(list); + correctSubjPass(list); if (DEBUG) { - printListSorted("After correctSubjPassAndPoss:", list); + printListSorted("After correctSubjPass:", list); } removeExactDuplicates(list); if (DEBUG) { @@ -178,9 +156,9 @@ protected void getExtras(List list) { printListSorted("After adding ref:", list); } - addXSubj(list); + addExtraNSubj(list); if (DEBUG) { - printListSorted("After adding xsubj:", list); + printListSorted("After adding extra nsubj:", list); } addStrandedPobj(list); @@ -457,9 +435,14 @@ protected void collapseDependencies(List list, boolean CCproces } if (includeExtras) { - addXSubj(list); + addExtraNSubj(list); if (DEBUG) { - printListSorted("After adding xsubj:", list); + printListSorted("After adding extra nsubj:", list); + } + + correctSubjPass(list); + if (DEBUG) { + printListSorted("After correctSubjPass:", list); } } @@ -834,24 +817,24 @@ private static void addRef(Collection list) { } /** - * Add xsubj dependencies when collapsing basic dependencies. + * Add extra nsubj dependencies when collapsing basic dependencies. *
* In the general case, we look for an aux modifier under an xcomp * modifier, and assuming there aren't already associated nsubj * dependencies as daughters of the original xcomp dependency, we - * add xsubj dependencies for each nsubj daughter of the aux. + * add nsubj dependencies for each nsubj daughter of the aux. *
* There is also a special case for "to" words, in which case we add * a dependency if and only if there is no nsubj associated with the * xcomp and there is no other aux dependency. This accounts for * sentences such as "he decided not to" with no following verb. */ - private static void addXSubj(Collection list) { + private static void addExtraNSubj(Collection list) { List newDeps = new ArrayList(); for (TypedDependency xcomp : list) { if (xcomp.reln() != XCLAUSAL_COMPLEMENT) { - // we only add xsubj dependencies to some xcomp dependencies + // we only add extra nsubj dependencies to some xcomp dependencies continue; } @@ -861,6 +844,7 @@ private static void addXSubj(Collection list) { boolean hasSubjectDaughter = false; boolean hasAux = false; List subjects = new ArrayList(); + List objects = new ArrayList(); for (TypedDependency dep : list) { // already have a subject dependency if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov() == modifier) { @@ -872,13 +856,16 @@ private static void addXSubj(Collection list) { hasAux = true; } - // TODO: create an xsubjpass to go with the NOMINAL_PASSIVE_SUBJECT if ((dep.reln() == NOMINAL_SUBJECT || dep.reln() == NOMINAL_PASSIVE_SUBJECT) && dep.gov() == head) { subjects.add(dep.dep()); } + + if (dep.reln() == DIRECT_OBJECT && dep.gov() == head) { + objects.add(dep.dep()); + } } - // if we already have an nsubj dependency, no need to add an xsubj + // if we already have an nsubj dependency, no need to add an extra nsubj if (hasSubjectDaughter) { continue; } @@ -888,9 +875,22 @@ private static void addXSubj(Collection list) { continue; } - for (TreeGraphNode subject : subjects) { - TypedDependency newDep = new TypedDependency(CONTROLLING_SUBJECT, modifier, subject); - newDeps.add(newDep); + // In general, we find that the objects of the verb are better + // for extra nsubj than the original nsubj of the verb. For example, + // "Many investors wrote asking the SEC to require ..." + // There is no nsubj of asking, but the dobj, SEC, is the extra nsubj of require. + // Similarly, "The law tells them when to do so" + // Instead of nsubj(do, law) we want nsubj(do, them) + if (objects.size() > 0) { + for (TreeGraphNode object : objects) { + TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, object); + newDeps.add(newDep); + } + } else { + for (TreeGraphNode subject : subjects) { + TypedDependency newDep = new TypedDependency(NOMINAL_SUBJECT, modifier, subject); + newDeps.add(newDep); + } } } @@ -904,12 +904,11 @@ private static void addXSubj(Collection list) { /** * This method corrects subjects of verbs for which we identified an auxpass, - * but didn't identify the subject as passive. It also corrects the possessive - * relations for PRP$ and WP$ which weren't retrieved. + * but didn't identify the subject as passive. * * @param list List of typedDependencies to work on */ - private static void correctSubjPassAndPoss(Collection list) { + private static void correctSubjPass(Collection list) { // put in a list verbs having an auxpass List list_auxpass = new ArrayList(); for (TypedDependency td : list) { diff --git a/src/edu/stanford/nlp/trees/GrammaticalRelation.java b/src/edu/stanford/nlp/trees/GrammaticalRelation.java index 852ce3f021..79464baf4e 100644 --- a/src/edu/stanford/nlp/trees/GrammaticalRelation.java +++ b/src/edu/stanford/nlp/trees/GrammaticalRelation.java @@ -26,7 +26,6 @@ package edu.stanford.nlp.trees; -import edu.stanford.nlp.ling.CoreAnnotation; import edu.stanford.nlp.trees.international.pennchinese.ChineseGrammaticalRelations; import edu.stanford.nlp.trees.tregex.TregexMatcher; import edu.stanford.nlp.trees.tregex.TregexPattern; @@ -111,7 +110,7 @@ * * @author Bill MacCartney * @author Galen Andrew (refactoring English-specific stuff) - * @author Ilya Sherman (refactoring annotation-relation pairing) + * @author Ilya Sherman (refactoring annotation-relation pairing, which is now gone) */ public class GrammaticalRelation implements Comparable, Serializable { @@ -119,16 +118,6 @@ public class GrammaticalRelation implements Comparable, Ser private static final boolean DEBUG = System.getProperty("GrammaticalRelation", null) != null; - public abstract static class GrammaticalRelationAnnotation implements CoreAnnotation> { - @Override - @SuppressWarnings({"unchecked", "RedundantCast"}) - public Class> getType() { return (Class) Set.class; } - } - - private static final Map, GrammaticalRelation> - annotationsToRelations = Generics.newHashMap(); - private static final Map> - relationsToAnnotations = Generics.newHashMap(); private static final EnumMap> stringsToRelations = new EnumMap>(Language.class); @@ -138,9 +127,8 @@ public abstract static class GrammaticalRelationAnnotation implements CoreAnnota * Example: "the red car" → gov(red, car) */ public static final GrammaticalRelation GOVERNOR = - new GrammaticalRelation(Language.Any, "gov", "governor", GovernorGRAnnotation.class, null); + new GrammaticalRelation(Language.Any, "gov", "governor", null); - public static class GovernorGRAnnotation extends GrammaticalRelationAnnotation { } /** * The "dependent" grammatical relation, which is the inverse of "governor".

@@ -148,35 +136,22 @@ public static class GovernorGRAnnotation extends GrammaticalRelationAnnotation { * Example: "the red car" → dep(car, red) */ public static final GrammaticalRelation DEPENDENT = - new GrammaticalRelation(Language.Any, "dep", "dependent", DependentGRAnnotation.class, null); + new GrammaticalRelation(Language.Any, "dep", "dependent", null); - public static class DependentGRAnnotation extends GrammaticalRelationAnnotation{ } /** * The "root" grammatical relation between a faked "ROOT" node, and the root of the sentence. */ public static final GrammaticalRelation ROOT = - new GrammaticalRelation(Language.Any, "root", "root", RootGRAnnotation.class, null); + new GrammaticalRelation(Language.Any, "root", "root", null); - public static class RootGRAnnotation extends GrammaticalRelationAnnotation{ } /** * Dummy relation, used while collapsing relations, in English & Chinese GrammaticalStructure */ public static final GrammaticalRelation KILL = - new GrammaticalRelation(Language.Any, "KILL", "dummy relation kill", KillGRAnnotation.class, null); - - public static class KillGRAnnotation extends GrammaticalRelationAnnotation { } - - public static Class - getAnnotationClass(GrammaticalRelation relation) { - return relationsToAnnotations.get(relation); - } + new GrammaticalRelation(Language.Any, "KILL", "dummy relation kill", null); - public static GrammaticalRelation - getRelation(Class annotation) { - return annotationsToRelations.get(annotation); - } /** * Returns the GrammaticalRelation having the given string @@ -232,7 +207,7 @@ public static GrammaticalRelation valueOf(Language language, String s) { name = s; specific = null; } - reln = new GrammaticalRelation(language, name, null, null, null, specific); + reln = new GrammaticalRelation(language, name, null, null, specific); } return reln; @@ -282,7 +257,6 @@ public enum Language { Any, English, Chinese } private GrammaticalRelation(Language language, String shortName, String longName, - Class annotation, GrammaticalRelation parent, String sourcePattern, TregexPatternCompiler tregexCompiler, @@ -298,15 +272,6 @@ private GrammaticalRelation(Language language, parent.addChild(this); } - if (annotation != null) { - if (GrammaticalRelation.annotationsToRelations.put(annotation, this) != null) { - throw new IllegalArgumentException("Adding '" + shortName + "' failed: Annotation cannot be associated with more than one relation!"); - } - if (GrammaticalRelation.relationsToAnnotations.put(this, annotation) != null) { - throw new IllegalArgumentException("Adding '" + shortName + "' failed: There should only ever be one instance of each relation!"); - } - } - if (sourcePattern != null) { try { this.sourcePattern = Pattern.compile(sourcePattern); @@ -348,29 +313,26 @@ private GrammaticalRelation(Language language, public GrammaticalRelation(Language language, String shortName, String longName, - Class annotation, GrammaticalRelation parent, String sourcePattern, TregexPatternCompiler tregexCompiler, String... targetPatterns) { - this(language, shortName, longName, annotation, parent, sourcePattern, tregexCompiler, targetPatterns, null); + this(language, shortName, longName, parent, sourcePattern, tregexCompiler, targetPatterns, null); } public GrammaticalRelation(Language language, String shortName, String longName, - Class annotation, GrammaticalRelation parent) { - this(language, shortName, longName, annotation, parent, null, null, StringUtils.EMPTY_STRING_ARRAY, null); + this(language, shortName, longName, parent, null, null, StringUtils.EMPTY_STRING_ARRAY, null); } public GrammaticalRelation(Language language, String shortName, String longName, - Class annotation, GrammaticalRelation parent, String specificString) { - this(language, shortName, longName, annotation, parent, null, null, StringUtils.EMPTY_STRING_ARRAY, specificString); + this(language, shortName, longName, parent, null, null, StringUtils.EMPTY_STRING_ARRAY, specificString); } private void addChild(GrammaticalRelation child) { diff --git a/src/edu/stanford/nlp/trees/GrammaticalStructure.java b/src/edu/stanford/nlp/trees/GrammaticalStructure.java index 55ca574cb9..d9272d2289 100644 --- a/src/edu/stanford/nlp/trees/GrammaticalStructure.java +++ b/src/edu/stanford/nlp/trees/GrammaticalStructure.java @@ -16,7 +16,6 @@ import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams; import edu.stanford.nlp.process.PTBTokenizer; import edu.stanford.nlp.process.WhitespaceTokenizer; -import edu.stanford.nlp.trees.GrammaticalRelation.GrammaticalRelationAnnotation; import edu.stanford.nlp.util.ErasureUtils; import edu.stanford.nlp.util.Filter; import edu.stanford.nlp.util.Filters; @@ -101,14 +100,14 @@ public GrammaticalStructure(Tree t, Collection relations, NoPunctTypedDependencyFilter puncTypedDepFilter = new NoPunctTypedDependencyFilter(puncFilter); DirectedMultiGraph basicGraph = new DirectedMultiGraph(); - //DirectedMultiGraph ncGraph = new DirectedMultiGraph(); + DirectedMultiGraph completeGraph = new DirectedMultiGraph(); // analyze the root (and its descendants, recursively) if (relationsLock != null) { relationsLock.lock(); } try { - analyzeNode(root, root, relations, hf, puncFilter, basicGraph); + analyzeNode(root, root, relations, hf, puncFilter, basicGraph, completeGraph); } finally { if (relationsLock != null) { @@ -118,12 +117,10 @@ public GrammaticalStructure(Tree t, Collection relations, attachStrandedNodes(root, root, false, puncFilter, basicGraph); - addGovernorArcLabels(basicGraph); - // add typed dependencies typedDependencies = getDeps(puncTypedDepFilter, basicGraph); allTypedDependencies = Generics.newArrayList(typedDependencies); - getExtraDeps(allTypedDependencies, puncTypedDepFilter); + getExtraDeps(allTypedDependencies, puncTypedDepFilter, completeGraph); } @@ -131,14 +128,6 @@ private static void throwDepFormatException(String dep) { throw new RuntimeException(String.format("Dependencies should be for the format 'type(arg-idx, arg-idx)'. Could not parse '%s'", dep)); } - private static void addGovernorArcLabels(DirectedMultiGraph basicGraph) { - for (TreeGraphNode gov : basicGraph.getAllVertices()) { - for (TreeGraphNode dep : basicGraph.getChildren(gov)) { - dep.headWordNode().addArc(GrammaticalRelation.getAnnotationClass(GOVERNOR), gov.headWordNode()); - } - } - } - /** * Create a grammatical structure from its string representation. * @@ -211,7 +200,7 @@ public static GrammaticalStructure fromStringReps(List tokens, List relations, HeadFinder hf, Filter puncFilter, DirectedMultiGraph basicGraph) { + private static void analyzeNode(TreeGraphNode t, TreeGraphNode root, Collection relations, HeadFinder hf, Filter puncFilter, DirectedMultiGraph basicGraph, DirectedMultiGraph completeGraph) { if (t.isPhrasal()) { // don't do leaves or preterminals! TreeGraphNode tHigh = t.highestNodeWithSameHead(); for (GrammaticalRelation egr : relations) { @@ -280,7 +269,7 @@ private static void analyzeNode(TreeGraphNode t, TreeGraphNode root, Collection< if (!puncFilter.accept(uHigh.headWordNode().label().value())) { continue; } - tHigh.addArc(GrammaticalRelation.getAnnotationClass(egr), uHigh); + completeGraph.add(tHigh, uHigh, egr); // If there are two patterns that add dependencies, X --> Z and Y --> Z, and X dominates Y, then the dependency Y --> Z is not added to the basic graph to prevent unwanted duplication. // Similarly, if there is already a path from X --> Y, and an expression would trigger Y --> X somehow, we ignore that Set parents = basicGraph.getParents(uHigh); @@ -294,15 +283,15 @@ private static void analyzeNode(TreeGraphNode t, TreeGraphNode root, Collection< } // now recurse into children for (TreeGraphNode kid : t.children()) { - analyzeNode(kid, root, relations, hf, puncFilter, basicGraph); + analyzeNode(kid, root, relations, hf, puncFilter, basicGraph, completeGraph); } } } - private void getExtraDeps(List deps, Filter puncTypedDepFilter) { + private void getExtraDeps(List deps, Filter puncTypedDepFilter, DirectedMultiGraph completeGraph) { getExtras(deps); // adds stuff to basicDep based on the tregex patterns over the tree - getTreeDeps(root(), deps, puncTypedDepFilter, extraTreeDepFilter()); + getTreeDeps(deps, completeGraph, puncTypedDepFilter, extraTreeDepFilter()); Collections.sort(deps); } @@ -318,7 +307,7 @@ private List getDeps(Filter puncTypedDepFilter for (TreeGraphNode gov : basicGraph.getAllVertices()) { for (TreeGraphNode dep : basicGraph.getChildren(gov)) { - GrammaticalRelation reln = getGrammaticalRelation(gov, dep, basicGraph.getEdges(gov, dep)); + GrammaticalRelation reln = getGrammaticalRelationCommonAncestor(gov, dep, basicGraph.getEdges(gov, dep)); // System.err.println(" Gov: " + gov + " Dep: " + dep + " Reln: " + reln); basicDep.add(new TypedDependency(reln, gov.headWordNode(), dep.headWordNode())); } @@ -391,38 +380,25 @@ protected void getExtras(List basicDep) { * additional dependencies which aren't * in the List but which satisfy the filter puncTypedDepFilter. * - * @param t The tree to examine (not changed) - * @param basicDep The list of dependencies which may be augmented + * @param deps The list of dependencies which may be augmented + * @param completeGraph a graph of all the tree dependencies found earlier * @param puncTypedDepFilter The filter that may skip punctuation dependencies * @param extraTreeDepFilter Additional dependencies are added only if they pass this filter */ - private static void getTreeDeps(TreeGraphNode t, List basicDep, + private static void getTreeDeps(List deps, + DirectedMultiGraph completeGraph, Filter puncTypedDepFilter, Filter extraTreeDepFilter) { - if (t.isPhrasal()) { // don't do leaves or POS tags (chris changed this from numChildren > 0 in 2010) - Map, Set> depMap = getAllDependents(t); - for (Class depName : depMap.keySet()) { - for (TreeGraphNode depNode : depMap.get(depName)) { - TreeGraphNode gov = t.headWordNode(); - TreeGraphNode dep = depNode.headWordNode(); - if (gov != dep) { - List rels = getListGrammaticalRelation(t, depNode); - if (!rels.isEmpty()) { - for (GrammaticalRelation rel : rels) { - TypedDependency newDep = new TypedDependency(rel, gov, dep); - if (!basicDep.contains(newDep) && puncTypedDepFilter.accept(newDep) && extraTreeDepFilter.accept(newDep)) { - newDep.setExtra(); - basicDep.add(newDep); - } - } - } + for (TreeGraphNode gov : completeGraph.getAllVertices()) { + for (TreeGraphNode dep : completeGraph.getChildren(gov)) { + for (GrammaticalRelation rel : removeGrammaticalRelationAncestors(completeGraph.getEdges(gov, dep))) { + TypedDependency newDep = new TypedDependency(rel, gov.headWordNode(), dep.headWordNode()); + if (!deps.contains(newDep) && puncTypedDepFilter.accept(newDep) && extraTreeDepFilter.accept(newDep)) { + newDep.setExtra(); + deps.add(newDep); } } } - // now recurse into children - for (Tree kid : t.children()) { - getTreeDeps((TreeGraphNode) kid, basicDep, puncTypedDepFilter, extraTreeDepFilter); - } } } @@ -489,31 +465,23 @@ public GrammaticalRelation getGrammaticalRelation(int govIndex, int depIndex) { * Get GrammaticalRelation between gov and dep, and null if gov is not the * governor of dep */ - public static GrammaticalRelation getGrammaticalRelation(TreeGraphNode gov, TreeGraphNode dep) { - TreeGraphNode govH = gov.highestNodeWithSameHead(); - TreeGraphNode depH = dep.highestNodeWithSameHead(); - // System.err.println(" gov node " + gov); - // System.err.println(" govH " + govH); - // System.err.println(" dep node " + dep); - // System.err.println(" depH " + depH); - + public GrammaticalRelation getGrammaticalRelation(TreeGraphNode gov, TreeGraphNode dep) { List labels = Generics.newArrayList(); - for (Class arcLabel : govH.arcLabelsToNode(depH)) { - if (arcLabel == null) { - continue; - } - try { - GrammaticalRelation reln = GrammaticalRelation.getRelation(arcLabel); - labels.add(reln); - } catch (Exception e) { - continue; + for (TypedDependency dependency : typedDependencies(true)) { + if (dependency.gov() == gov && dependency.dep() == dep) { + labels.add(dependency.reln()); } } - return getGrammaticalRelation(govH, depH, labels); + return getGrammaticalRelationCommonAncestor(gov, dep, labels); } - public static GrammaticalRelation getGrammaticalRelation(TreeGraphNode govH, TreeGraphNode depH, List labels) { + /** + * Returns the GrammaticalRelation which is the highest common + * ancestor of the list of relations passed in. The TreeGraphNodes + * are passed in only for debugging reasons. + */ + private static GrammaticalRelation getGrammaticalRelationCommonAncestor(TreeGraphNode govH, TreeGraphNode depH, List labels) { GrammaticalRelation reln = GrammaticalRelation.DEPENDENT; List sortedLabels; @@ -546,56 +514,31 @@ public static GrammaticalRelation getGrammaticalRelation(TreeGraphNode govH, Tre return reln; } - - /** - * Get a list of GrammaticalRelation between gov and dep. Useful for getting extra dependencies, in which - * two nodes can be linked by multiple arcs. - */ - public static List getListGrammaticalRelation(TreeGraphNode gov, TreeGraphNode dep) { - List list = new ArrayList(); - TreeGraphNode govH = gov.highestNodeWithSameHead(); - TreeGraphNode depH = dep.highestNodeWithSameHead(); - - /*System.out.println("Extra gov node " + gov); - System.out.println("govH " + govH); - System.out.println("dep node " + dep); - System.out.println("depH " + depH);*/ - - Set> arcLabels = govH.arcLabelsToNode(depH); - //System.out.println("arcLabels: " + arcLabels); - if (dep != depH) { - Set> arcLabels2 = govH.arcLabelsToNode(dep); - //System.out.println("arcLabels2: " + arcLabels2); - arcLabels.addAll(arcLabels2); - } - //System.out.println("arcLabels: " + arcLabels); - - for (Class arcLabel : arcLabels) { - if (arcLabel != null) { - GrammaticalRelation reln = GrammaticalRelation.getRelation(arcLabel); - boolean descendantFound = false; - for (int index = 0; index < list.size(); ++index) { - GrammaticalRelation gr = list.get(index); - //if the element in the list is an ancestor of the current - //relation, remove it (we will replace it later) - if (gr.isAncestor(reln)) { - list.remove(index); - --index; - } else if (reln.isAncestor(gr)) { - //if the relation is not an ancestor of an element in the - //list, we add the relation - descendantFound = true; - } - } - if (!descendantFound) { - list.add(reln); + private static List removeGrammaticalRelationAncestors(List original) { + List filtered = Generics.newArrayList(); + for (GrammaticalRelation reln : original) { + boolean descendantFound = false; + for (int index = 0; index < filtered.size(); ++index) { + GrammaticalRelation gr = filtered.get(index); + //if the element in the list is an ancestor of the current + //relation, remove it (we will replace it later) + if (gr.isAncestor(reln)) { + filtered.remove(index); + --index; + } else if (reln.isAncestor(gr)) { + //if the relation is not an ancestor of an element in the + //list, we add the relation + descendantFound = true; } } + if (!descendantFound) { + filtered.add(reln); + } } - //System.out.println("in list " + list); - return list; + return filtered; } + /** * Returns the typed dependencies of this grammatical structure. These * are the basic word-level typed dependencies, where each word is dependent @@ -762,65 +705,6 @@ protected void correctDependencies(Collection list) { } - /** - * Returns the dependency path as a list of String, from node to root, it is assumed that - * that root is an ancestor of node - * - * @return A list of dependency labels - */ - public List getDependencyPath(int nodeIndex, int rootIndex) { - TreeGraphNode node = getNodeByIndex(nodeIndex); - TreeGraphNode rootTree = getNodeByIndex(rootIndex); - return getDependencyPath(node, rootTree); - } - - /** - * Returns the dependency path as a list of String, from node to root, it is assumed that - * that root is an ancestor of node - * - * @param node Note to return path from - * @param root The root of the tree, an ancestor of node - * @return A list of dependency labels - */ - // used only by unused method above. - private static List getDependencyPath(TreeGraphNode node, TreeGraphNode root) { - List path = new ArrayList(); - while (!node.equals(root)) { - TreeGraphNode gov = node.getGovernor(); - // System.out.println("Governor for \"" + node.value() + "\": \"" + gov.value() + "\""); - List relations = getListGrammaticalRelation(gov, node); - StringBuilder sb = new StringBuilder(); - for (GrammaticalRelation relation : relations) { - //if (!arcLabel.equals(GOVERNOR)) - sb.append((sb.length() == 0 ? "" : "+")).append(relation.toString()); - } - path.add(sb.toString()); - node = gov; - } - return path; - } - - /** - * Returns all the dependencies of a certain node. - * - * @param node The node to return dependents for - * @return map of dependencies - */ - private static // separating this out helps some compilers - Map, Set> getAllDependents(TreeGraphNode node) { - Map, Set> newMap = Generics.newHashMap(); - - for (Class o : node.label.keySet()) { - if (GrammaticalRelationAnnotation.class.isAssignableFrom(o)) { - // ignore any non-GrammaticalRelationAnnotation element - Class typedKey = ErasureUtils.uncheckedCast(o); - newMap.put(typedKey, node.label.get(typedKey)); - } - } - return newMap; - } - - /** * Checks if all the typeDependencies are connected * @param list a list of typedDependencies diff --git a/src/edu/stanford/nlp/trees/SemanticHeadFinder.java b/src/edu/stanford/nlp/trees/SemanticHeadFinder.java index 3875e2025d..fb141c4cc3 100644 --- a/src/edu/stanford/nlp/trees/SemanticHeadFinder.java +++ b/src/edu/stanford/nlp/trees/SemanticHeadFinder.java @@ -61,7 +61,7 @@ public class SemanticHeadFinder extends ModCollinsHeadFinder { /* Tricky auxiliaries: "na" is from "gonna", "ve" from "Weve", etc. "of" as non-standard for "have" */ private static final String[] auxiliaries = {"will", "wo", "shall", "sha", "may", "might", "should", "would", "can", "could", "ca", "must", "has", "have", "had", "having", "get", "gets", "getting", "got", "gotten", "do", "does", "did", "to", "'ve", "ve", "v", "'d", "d", "'ll", "ll", "na", "of", "hav", "hvae", "as" }; private static final String[] beGetVerbs = {"be", "being", "been", "am", "are", "r", "is", "ai", "was", "were", "'m", "m", "'re", "'s", "s", "art", "ar", "get", "getting", "gets", "got"}; - private static final String[] copulaVerbs = {"be", "being", "been", "am", "are", "r", "is", "ai", "was", "were", "'m", "m", "'re", "'s", "s", "wase", "seem", "seems", "seemed", "appear", "appears", "appeared", "stay", "stays", "stayed", "remain", "remains", "remained", "resemble", "resembles", "resembled", "become", "becomes", "became"}; + static final String[] copulaVerbs = {"be", "being", "been", "am", "are", "r", "is", "ai", "was", "were", "'m", "m", "ar", "art", "'re", "'s", "s", "wase"}; // include Charniak tags so can do BLLIP right private static final String[] verbTags = {"TO", "MD", "VB", "VBD", "VBP", "VBZ", "VBG", "VBN", "AUX", "AUXG"}; diff --git a/src/edu/stanford/nlp/trees/TreeGraph.java b/src/edu/stanford/nlp/trees/TreeGraph.java index e6e7cdb558..e9e76ee211 100644 --- a/src/edu/stanford/nlp/trees/TreeGraph.java +++ b/src/edu/stanford/nlp/trees/TreeGraph.java @@ -132,9 +132,6 @@ public static void main(String[] args) { // node1.addArc("1TO4", node4); // node4.addArc("4to1", node1); // System.out.println("----------------------------"); -// System.out.println("arcs from 1 to 4: " + node1.arcLabelsToNode(node4)); -// System.out.println("arcs from 4 to 1: " + node4.arcLabelsToNode(node1)); -// System.out.println("arcs from 0 to 4: " + tg.root.arcLabelsToNode(node4)); // for (int i = 0; i <= 9; i++) { // System.out.println("parent of " + i + ": " + tg.getNodeByIndex(i).parent()); // System.out.println("highest node with same head as " + i + ": " + tg.getNodeByIndex(i).highestNodeWithSameHead()); diff --git a/src/edu/stanford/nlp/trees/TreeGraphNode.java b/src/edu/stanford/nlp/trees/TreeGraphNode.java index 92e7b29751..6cf51f2651 100644 --- a/src/edu/stanford/nlp/trees/TreeGraphNode.java +++ b/src/edu/stanford/nlp/trees/TreeGraphNode.java @@ -10,7 +10,6 @@ import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.Label; import edu.stanford.nlp.ling.LabelFactory; -import edu.stanford.nlp.trees.GrammaticalRelation.GrammaticalRelationAnnotation; import edu.stanford.nlp.util.ErasureUtils; import edu.stanford.nlp.util.Filter; import edu.stanford.nlp.util.Generics; @@ -340,156 +339,6 @@ protected void setTreeGraph(TreeGraph tg) { } } - /** - * Add a labeled arc from this node to the argument node. - * - * @param arcLabel the Class<? extends GrammaticalRelationAnnotation> with which the new arc - * is to be labeled. - * @param node the TreeGraphNode to which the new - * arc should point. - * @return true iff the arc did not already exist. - */ - @SuppressWarnings("unchecked") - public boolean addArc(Class arcLabel, TreeGraphNode node) { - if (node == null) { - return false; - } - if (!treeGraph().equals(node.treeGraph())) { - System.err.println("Warning: you are trying to add an arc from node " + this + " to node " + node + ", but they do not belong to the same TreeGraph!"); - } - Set collection = label.get(arcLabel); - if (collection == null) { - collection = Generics.newHashSet(); - label.set(arcLabel, collection); - } - return collection.add(node); - } - - /** - * Returns the Set of TreeGraphNodes to - * which there exist arcs bearing the specified label from this - * node, or null if no such nodes exist. - * - * @param arcLabel the Object which labels the - * arc(s) to be followed. - * @return a Set containing only and all the - * TreeGraphNodes to which there exist arcs bearing - * the specified label from this node. - */ - public Set followArcToSet(Class arcLabel) { - return label().get(arcLabel); - } - - /** - * Returns a single TreeGraphNode to which there - * exists an arc bearing the specified label from this node, or - * null if no such node exists. If more than one - * such node exists, this method will return an arbitrary node - * from among them; if this is a possibility, you might want to - * use {@link TreeGraphNode#followArcToSet - * followArcToSet} instead. - * - * @param arcLabel a Object containing the label of - * the arc(s) to be followed - * @return a TreeGraphNode to which there exists an - * arc bearing the specified label from this node - */ - public TreeGraphNode followArcToNode(Class arcLabel) { - Set valueSet = followArcToSet(arcLabel); - if (valueSet == null) { - return null; - } - return valueSet.iterator().next(); - } - - /** - * Finds all arcs between this node and destNode, - * and returns the Set of Objects which - * label those arcs. If no such arcs exist, returns an empty - * Set. - * - * @param destNode the destination node - * @return the Set of Objects which - * label arcs between this node and destNode - */ - public Set> arcLabelsToNode(TreeGraphNode destNode) { - Set> arcLabels = Generics.newHashSet(); - CoreLabel cl = label(); - for (Class key : cl.keySet()) { - if (key == null || !GrammaticalRelationAnnotation.class.isAssignableFrom(key)) { - continue; - } - Class typedKey = ErasureUtils.uncheckedCast(key); - Set val = cl.get(typedKey); - if (val != null && val.contains(destNode)) { - arcLabels.add(typedKey); - } - } - return arcLabels; - } - - /** - * Returns the label of a single arc between this node and destNode, - * or null if no such arc exists. If more than one - * such arc exists, this method will return an arbitrary arc label - * from among them; if this is a possibility, you might want to - * use {@link TreeGraphNode#arcLabelsToNode - * arcLabelsToNode} instead. - * - * @param destNode the destination node - * @return the Object which - * labels one arc between this node and destNode - */ - public Class arcLabelToNode(TreeGraphNode destNode) { - Set> arcLabels = arcLabelsToNode(destNode); - if (arcLabels == null) { - return null; - } - if (arcLabels.size() == 0) { - return null; - } - return arcLabels.iterator().next(); - } - - /** - * Tries to return a leaf (terminal) node which is the {@link - * GrammaticalRelation#GOVERNOR - * GOVERNOR} of the given node t. - * Probably, t should be a leaf node as well. - * - * @param t a leaf node in this GrammaticalStructure - * @return a node which is the governor for node - * t, or else null - */ - public TreeGraphNode getGovernor() { - return getNodeInRelation(GOVERNOR); - } - - public TreeGraphNode getNodeInRelation(GrammaticalRelation r) { - return followArcToNode(GrammaticalRelation.getAnnotationClass(r)); - } - - /** - * Tries to return a Set of leaf (terminal) nodes - * which are the {@link GrammaticalRelation#DEPENDENT - * DEPENDENT}s of the given node t. - * Probably, this should be a leaf node as well. - * - * @return a Set of nodes which are dependents of - * node this, possibly an empty set - */ - public Set getDependents() { - Set deps = Generics.newHashSet(); - for (Tree subtree : treeGraph().root()) { - TreeGraphNode node = (TreeGraphNode) subtree; - TreeGraphNode gov = node.getGovernor(); - if (gov != null && gov == this) { - deps.add(node); - } - } - return deps; - } - /** * Uses the specified {@link HeadFinder HeadFinder} * to determine the heads for this node and all its descendants, diff --git a/src/edu/stanford/nlp/trees/international/pennchinese/ChineseGrammaticalRelations.java b/src/edu/stanford/nlp/trees/international/pennchinese/ChineseGrammaticalRelations.java index 964af28e80..941125533c 100644 --- a/src/edu/stanford/nlp/trees/international/pennchinese/ChineseGrammaticalRelations.java +++ b/src/edu/stanford/nlp/trees/international/pennchinese/ChineseGrammaticalRelations.java @@ -41,7 +41,6 @@ import static edu.stanford.nlp.trees.GrammaticalRelation.DEPENDENT; import static edu.stanford.nlp.trees.GrammaticalRelation.GOVERNOR; -import edu.stanford.nlp.trees.GrammaticalRelation.GrammaticalRelationAnnotation; /** * ChineseGrammaticalRelations is a @@ -96,18 +95,13 @@ public static GrammaticalRelation valueOf(String s) { * Arguments are required by their heads. */ public static final GrammaticalRelation ARGUMENT = - new GrammaticalRelation(Language.Chinese, "arg", "argument", ArgumentGRAnnotation.class, - DEPENDENT); - public static class ArgumentGRAnnotation - extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.Chinese, "arg", "argument", DEPENDENT); /** * The "subject" (subj) grammatical relation (abstract). */ public static final GrammaticalRelation SUBJECT = - new GrammaticalRelation(Language.Chinese, "subj", "subject", - SubjectGRAnnotation.class, ARGUMENT); - public static class SubjectGRAnnotation extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.Chinese, "subj", "subject", ARGUMENT); /** * The "nominal subject" (nsubj) grammatical relation. A nominal subject is @@ -132,13 +126,12 @@ public static class SubjectGRAnnotation extends GrammaticalRelationAnnotation { */ public static final GrammaticalRelation NOMINAL_SUBJECT = new GrammaticalRelation(Language.Chinese, "nsubj", "nominal subject", - NominalSubjectGRAnnotation.class, SUBJECT, "IP|VP", tregexCompiler, + SUBJECT, "IP|VP", tregexCompiler, new String[] { "IP <( ( NP|QP=target!< NT ) $++ ( /^VP|VCD|IP/ !< VE ! VP < ( VV $+ ( NP|DP|QP|CLP=target . NP|DP ) )" }); - public static class IndirectObjectGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "range" grammatical relation (Chinese only). The indirect @@ -285,13 +264,11 @@ public static class IndirectObjectGRAnnotation public static final GrammaticalRelation RANGE = new GrammaticalRelation(Language.Chinese, "range", "range", - RangeGRAnnotation.class, INDIRECT_OBJECT, "VP", tregexCompiler, + INDIRECT_OBJECT, "VP", tregexCompiler, new String[]{ "VP < ( NP|DP|QP $+ NP|DP|QP=target)", "VP < ( VV $+ QP=target )" }); - public static class RangeGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "clausal complement" (ccomp) grammatical relation. @@ -317,15 +294,13 @@ public static class RangeGRAnnotation public static final GrammaticalRelation CLAUSAL_COMPLEMENT = new GrammaticalRelation(Language.Chinese, "ccomp", "clausal complement", - ClausalComplementGRAnnotation.class, COMPLEMENT, "VP|ADJP|IP", tregexCompiler, + COMPLEMENT, "VP|ADJP|IP", tregexCompiler, new String[]{ " VP < VV|VC|VRD|VCD !< NP|QP|LCP < IP|VP|VRD|VCD=target > IP|VP " }); // " VP|IP < ( VV|VC|VRD|VCD !$+ NP|QP|LCP ) > (IP < IP|VP|VRD|VCD=target) " // "VP < (S=target < (VP !<, TO|VBG) !$-- NP)", - public static class ClausalComplementGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "xclausal complement" (xcomp) grammatical relation. @@ -335,7 +310,7 @@ public static class ClausalComplementGRAnnotation /*public static final GrammaticalRelation XCLAUSAL_COMPLEMENT = new GrammaticalRelation(Language.Chinese, "xcomp", "xclausal complement", - XClausalComplementGRAnnotation.class, COMPLEMENT, "VP|ADJP", tregexCompiler, + COMPLEMENT, "VP|ADJP", tregexCompiler, new String[]{ // TODO: these rules seem to always collide with ccomp. // Is this really desirable behavior? @@ -344,8 +319,6 @@ public static class ClausalComplementGRAnnotation //"VP < (IP=target < (NP $+ NP|ADJP))", //"VP < (/^VC/ $+ (VP=target < VC < NP))" }); - public static class XClausalComplementGRAnnotation - extends GrammaticalRelationAnnotation { }*/ //////////////////////////////////////////////////////////// // MODIFIER relations @@ -355,10 +328,7 @@ public static class XClausalComplementGRAnnotation * The "modifier" (mod) grammatical relation (abstract). */ public static final GrammaticalRelation MODIFIER = - new GrammaticalRelation(Language.Chinese, "mod", "modifier", - ModifierGRAnnotation.class, DEPENDENT); - public static class ModifierGRAnnotation - extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.Chinese, "mod", "modifier", DEPENDENT); /** * The "number modifier" (nummod) grammatical relation. @@ -377,56 +347,46 @@ public static class ModifierGRAnnotation */ public static final GrammaticalRelation NUMERIC_MODIFIER = new GrammaticalRelation(Language.Chinese, "nummod", "numeric modifier", - NumericModifierGRAnnotation.class, MODIFIER, + MODIFIER, "QP|NP", tregexCompiler, new String[]{ "QP < CD=target", "NP < ( QP=target !<< CLP )" }); - public static class NumericModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "ordinal modifier" (ordmod) grammatical relation. */ public static final GrammaticalRelation ORDINAL_MODIFIER = new GrammaticalRelation(Language.Chinese, "ordmod", "ordinal numeric modifier", - OrdinalModifierGRAnnotation.class, NUMERIC_MODIFIER, + NUMERIC_MODIFIER, "NP|QP", tregexCompiler, new String[]{ "NP < QP=target < ( OD !$+ CLP )", "QP < (OD=target $+ CLP)" }); - public static class OrdinalModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "appositional modifier" (appos) grammatical relation (abstract). */ public static final GrammaticalRelation APPOSITIONAL_MODIFIER = - new GrammaticalRelation(Language.Chinese, "appos", "appositional modifier", AppositionalModifierGRAnnotation.class, MODIFIER); - public static class AppositionalModifierGRAnnotation - extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.Chinese, "appos", "appositional modifier", MODIFIER); /** * The "parenthetical modifier" (prnmod) grammatical relation (Chinese-specific). */ public static final GrammaticalRelation PARENTHETICAL_MODIFIER = new GrammaticalRelation(Language.Chinese, "prnmod", "parenthetical modifier", - ParentheticalGRAnnotation.class, MODIFIER, "NP", tregexCompiler, + MODIFIER, "NP", tregexCompiler, new String[]{ "NP < PRN=target " }); - public static class ParentheticalGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "noun modifier" grammatical relation (abstract). */ public static final GrammaticalRelation NOUN_MODIFIER = - new GrammaticalRelation(Language.Chinese, "nmod", "noun modifier", NounModifierGRAnnotation.class, MODIFIER); - public static class NounModifierGRAnnotation - extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.Chinese, "nmod", "noun modifier", MODIFIER); /** * The "associative modifier" (assmod) grammatical relation (Chinese-specific). @@ -435,12 +395,10 @@ public static class NounModifierGRAnnotation public static final GrammaticalRelation ASSOCIATIVE_MODIFIER = new GrammaticalRelation(Language.Chinese, "assmod", "associative modifier (examples: 上海市/Shanghai[modifier] 的 规定/law[head])", - AssociativeModifierGRAnnotation.class, NOUN_MODIFIER, "NP|QP", tregexCompiler, + NOUN_MODIFIER, "NP|QP", tregexCompiler, new String[]{ "NP|QP < ( DNP =target $++ NP|QP ) " }); - public static class AssociativeModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "temporal modifier" grammatical relation. @@ -461,12 +419,10 @@ public static class AssociativeModifierGRAnnotation public static final GrammaticalRelation TEMPORAL_MODIFIER = new GrammaticalRelation(Language.Chinese, "tmod", "temporal modifier", - TemporalGRAnnotation.class, NOUN_MODIFIER, "VP|IP", tregexCompiler, + NOUN_MODIFIER, "VP|IP", tregexCompiler, new String[]{ "VP|IP < (NP=target < NT !.. /^VC$/ $++ VP)" }); - public static class TemporalGRAnnotation - extends GrammaticalRelationAnnotation { } /* This rule actually matches nothing. There's another tmod rule. This is removed for now. @@ -476,13 +432,11 @@ public static class TemporalGRAnnotation public static final GrammaticalRelation TEMPORAL_MODIFIER = new GrammaticalRelation(Language.Chinese, "tmod", "temporal modifier", - TemporalModifierGRAnnotation.class, MODIFIER, "VP|IP|ADJP", tregexCompiler, + MODIFIER, "VP|IP|ADJP", tregexCompiler, new String[]{ " VC|VE ! >> VP|ADJP < NP=target < NT", "VC|VE !>>IP <( NP=target < NT $++ VP !< VC|VE )" }); - public static class TemporalModifierGRAnnotation - extends GrammaticalRelationAnnotation { } */ /** @@ -513,7 +467,6 @@ public static class TemporalModifierGRAnnotation */ public static final GrammaticalRelation RELATIVE_CLAUSE_MODIFIER = new GrammaticalRelation(Language.Chinese, "relcl", "relative clause modifier", - RelativeClauseModifierGRAnnotation.class, MODIFIER, "NP", tregexCompiler, new String[] { // TODO: we should figure out various @@ -522,8 +475,6 @@ public static class TemporalModifierGRAnnotation "NP $++ (CP=target ) > NP ", "NP < ( CP=target $++ NP )" }); - public static class RelativeClauseModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "non-finite clause" grammatical relation. @@ -532,12 +483,10 @@ public static class RelativeClauseModifierGRAnnotation public static final GrammaticalRelation NONFINITE_CLAUSE_MODIFIER = new GrammaticalRelation(Language.Chinese, "nfincl", "non-finite clause modifier (examples: stores[head] based[modifier] in Boston", - NonfiniteClauseModifierGRAnnotation.class, MODIFIER, "NP", tregexCompiler, + MODIFIER, "NP", tregexCompiler, new String[]{ "NP < IP=target " }); - public static class NonfiniteClauseModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "adjective modifier" (amod) grammatical relation. @@ -556,12 +505,10 @@ public static class NonfiniteClauseModifierGRAnnotation public static final GrammaticalRelation ADJECTIVAL_MODIFIER = new GrammaticalRelation(Language.Chinese, "amod", "adjectival modifier", - AdjectivalModifierGRAnnotation.class, MODIFIER, "NP|CLP|QP", tregexCompiler, + MODIFIER, "NP|CLP|QP", tregexCompiler, new String[]{ "NP|CLP|QP < (ADJP=target $++ NP|CLP|QP ) " }); - public static class AdjectivalModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "determiner modifier" (det) grammatical relation. @@ -578,13 +525,11 @@ public static class AdjectivalModifierGRAnnotation */ public static final GrammaticalRelation DETERMINER = new GrammaticalRelation(Language.Chinese, "det", "determiner", - DeterminerGRAnnotation.class, MODIFIER, "^NP|DP", tregexCompiler, + MODIFIER, "^NP|DP", tregexCompiler, new String[]{ "/^NP/ < (DP=target $++ NP )" //"DP < DT < QP=target" }); - public static class DeterminerGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "negative modifier" (neg) grammatical relation. @@ -605,13 +550,11 @@ public static class DeterminerGRAnnotation public static final GrammaticalRelation NEGATION_MODIFIER = new GrammaticalRelation(Language.Chinese, "neg", "negation modifier", - NegationModifierGRAnnotation.class, MODIFIER, "VP|ADJP|IP", tregexCompiler, + MODIFIER, "VP|ADJP|IP", tregexCompiler, new String[] { "VP|ADJP|IP < (AD|VV=target < /^(\\u4e0d|\\u6CA1|\\u6CA1\\u6709)$/)", "VP|ADJP|IP < (ADVP|VV=target < (AD < /^(\\u4e0d|\\u6CA1|\\u6CA1\\u6709)$/))" }); - public static class NegationModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "adverbial modifier" (advmod) grammatical relation. @@ -630,7 +573,7 @@ public static class NegationModifierGRAnnotation public static final GrammaticalRelation ADVERBIAL_MODIFIER = new GrammaticalRelation(Language.Chinese, "advmod", "adverbial modifier", - AdverbialModifierGRAnnotation.class, MODIFIER, + MODIFIER, "VP|ADJP|IP|CP|PP|NP|QP", tregexCompiler, new String[]{ "VP|ADJP|IP|CP|PP|NP < (ADVP=target !< (AD < /^(\\u4e0d|\\u6CA1|\\u6CA1\\u6709)$/))", @@ -638,8 +581,6 @@ public static class NegationModifierGRAnnotation "QP < (ADVP=target $+ QP)", "QP < ( QP $+ ADVP=target)" }); - public static class AdverbialModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "dvp modifier" grammatical relation. @@ -658,12 +599,10 @@ public static class AdverbialModifierGRAnnotation */ public static final GrammaticalRelation DVPM_MODIFIER = new GrammaticalRelation(Language.Chinese, "dvpmod", "dvp modifier", - DvpModifierGRAnnotation.class, ADVERBIAL_MODIFIER, "VP", tregexCompiler, + ADVERBIAL_MODIFIER, "VP", tregexCompiler, new String[]{ " VP < ( DVP=target $+ VP) " }); - public static class DvpModifierGRAnnotation - extends GrammaticalRelationAnnotation { } //////////////////////////////////////////////////////////// // Special clausal dependents @@ -674,13 +613,11 @@ public static class DvpModifierGRAnnotation */ public static final GrammaticalRelation AUX_MODIFIER = new GrammaticalRelation(Language.Chinese, "aux", "auxiliary (example: should[modifier] leave[head])", - AuxModifierGRAnnotation.class, DEPENDENT, "VP", tregexCompiler, + DEPENDENT, "VP", tregexCompiler, new String[]{ // TODO //" VP < VC=target" }); - public static class AuxModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "modal" grammatical relation. @@ -693,12 +630,10 @@ public static class AuxModifierGRAnnotation */ public static final GrammaticalRelation MODAL_VERB = new GrammaticalRelation(Language.Chinese, "mmod", "modal verb", - ModalGRAnnotation.class, AUX_MODIFIER, "VP", tregexCompiler, + AUX_MODIFIER, "VP", tregexCompiler, new String[]{ "VP < ( VV=target !< /^没有$/ $+ VP|VRD )" }); - public static class ModalGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "aspect marker" grammatical relation. @@ -709,24 +644,20 @@ public static class ModalGRAnnotation */ public static final GrammaticalRelation ASPECT_MARKER = new GrammaticalRelation(Language.Chinese, "asp", "aspect", - AspectMarkerGRAnnotation.class, AUX_MODIFIER, "VP", tregexCompiler, + AUX_MODIFIER, "VP", tregexCompiler, new String[]{ "VP < ( /^V*/ $+ AS=target)" }); - public static class AspectMarkerGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "auxiliary passive" (auxpass) grammatical relation. */ public static final GrammaticalRelation AUX_PASSIVE_MODIFIER = new GrammaticalRelation(Language.Chinese, "auxpass", "auxiliary passive", - AuxPassiveGRAnnotation.class, MODIFIER, "VP", tregexCompiler, + MODIFIER, "VP", tregexCompiler, new String[]{ "VP < SB|LB=target" }); - public static class AuxPassiveGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "copula" grammatical relation. @@ -744,12 +675,10 @@ public static class AuxPassiveGRAnnotation */ public static final GrammaticalRelation COPULA = new GrammaticalRelation(Language.Chinese, "cop", "copula", - CopulaGRAnnotation.class, DEPENDENT, "VP", tregexCompiler, + DEPENDENT, "VP", tregexCompiler, new String[]{ " VP < VC=target" }); - public static class CopulaGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "marker" (mark) grammatical relation. A marker is the word @@ -805,15 +734,13 @@ public static class CopulaGRAnnotation public static final GrammaticalRelation MARK = new GrammaticalRelation(Language.Chinese, "mark", "marker (examples: that[modifier] expanded[head]; 开发/expand[head] 浦东/Pudong 的[modifier])", - MarkGRAnnotation.class, DEPENDENT, "^PP|^LCP|^CP|^DVP", tregexCompiler, + DEPENDENT, "^PP|^LCP|^CP|^DVP", tregexCompiler, new String[]{ "/^PP/ < (P=target $+ VP)", "/^LCP/ < (P=target $+ VP)", "/^CP/ < (__ $++ DEC=target)", "DVP < (__ $+ DEV=target)" }); - public static class MarkGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "punctuation" grammatical relation. This is used for any piece of @@ -822,9 +749,8 @@ public static class MarkGRAnnotation */ public static final GrammaticalRelation PUNCTUATION = new GrammaticalRelation(Language.Chinese, "punct", "punctuation", - PunctuationGRAnnotation.class, DEPENDENT, ".*", tregexCompiler, + DEPENDENT, ".*", tregexCompiler, "__ < PU=target"); - public static class PunctuationGRAnnotation extends GrammaticalRelationAnnotation { } //////////////////////////////////////////////////////////// // Other (compounding, coordination) @@ -834,10 +760,7 @@ public static class PunctuationGRAnnotation extends GrammaticalRelationAnnotatio * The "compound" grammatical relation (abstract). */ public static final GrammaticalRelation COMPOUND = - new GrammaticalRelation(Language.Chinese, "compound", "compound (examples: phone book, three thousand)", - CompoundGRAnnotation.class, ARGUMENT); - public static class CompoundGRAnnotation - extends GrammaticalRelationAnnotation { } + new GrammaticalRelation(Language.Chinese, "compound", "compound (examples: phone book, three thousand)", ARGUMENT); /** * The "noun compound" (nn) grammatical relation. @@ -855,14 +778,12 @@ public static class CompoundGRAnnotation public static final GrammaticalRelation NOUN_COMPOUND = new GrammaticalRelation(Language.Chinese, "nn", "noun compound", - NounCompoundModifierGRAnnotation.class, COMPOUND, "^NP", tregexCompiler, + COMPOUND, "^NP", tregexCompiler, new String[]{ "NP < (NN|NR|NT=target $+ NN|NR|NT)", "NP < (NN|NR|NT $+ FW=target)", "NP < (NP=target !$+ PU|CC $++ NP|PRN)" }); - public static class NounCompoundModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "coordinated verb compound" grammatical relation. @@ -871,12 +792,10 @@ public static class NounCompoundModifierGRAnnotation */ public static final GrammaticalRelation VERB_COMPOUND = new GrammaticalRelation(Language.Chinese, "comod", "coordinated verb compound", - VerbCompoundGRAnnotation.class, COMPOUND, "VCD", tregexCompiler, + COMPOUND, "VCD", tregexCompiler, new String[]{ "VCD < ( VV|VA $+ VV|VA=target)" }); - public static class VerbCompoundGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "conjunct" (conj) grammatical relation. @@ -905,7 +824,7 @@ public static class VerbCompoundGRAnnotation public static final GrammaticalRelation CONJUNCT = new GrammaticalRelation(Language.Chinese, "conj", "conjunct", - PreconjunctGRAnnotation.class, DEPENDENT, "FRAG|INC|IP|VP|NP|ADJP|PP|ADVP|UCP", tregexCompiler, + DEPENDENT, "FRAG|INC|IP|VP|NP|ADJP|PP|ADVP|UCP", tregexCompiler, new String[]{ "NP|ADJP|PP|ADVP|UCP < (!PU=target $+ CC)", // Split the first rule to the second rule to avoid the duplication: @@ -947,8 +866,6 @@ public static class VerbCompoundGRAnnotation "FRAG|INC|IP|VP < (VP < VV|VC|VRD|VCD|VE|VA < NP|QP|LCP $ IP|VP|VRD|VCD|VE|VC|VA=target) ", "IP|VP < ( IP|VP < NP|QP|LCP $ IP|VP=target )", }); - public static class PreconjunctGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "coordination" grammatical relation. @@ -974,13 +891,11 @@ public static class PreconjunctGRAnnotation */ public static final GrammaticalRelation COORDINATION = new GrammaticalRelation(Language.Chinese, - "cc", "coordination", CoordinationGRAnnotation.class, DEPENDENT, + "cc", "coordination", DEPENDENT, "VP|NP|ADJP|PP|ADVP|UCP|IP|QP", tregexCompiler, new String[]{ "VP|NP|ADJP|PP|ADVP|UCP|IP|QP < (CC=target)" }); - public static class CoordinationGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "case" grammatical relation. @@ -1024,7 +939,7 @@ public static class CoordinationGRAnnotation public static final GrammaticalRelation CASE = new GrammaticalRelation(Language.Chinese, "case", "case marking (examples: Chair[head] 's[modifier], 根据/according[modifier] ... 规定/rule[head]; 近年/this year[head] 来[modifier])", - CaseGRAnnotation.class, DEPENDENT, "^PP|^LCP|^DNP", tregexCompiler, + DEPENDENT, "^PP|^LCP|^DNP", tregexCompiler, new String[]{ //"/^NP|^DP|QP/ > (/^PP/ < P=target)", //"/^NP|^DP|QP/ > (/^LCP/ < LC=target)", @@ -1033,8 +948,6 @@ public static class CoordinationGRAnnotation "/^LCP/ < LC=target", "/^DNP/ < DEG=target", }); - public static class CaseGRAnnotation - extends GrammaticalRelationAnnotation { } //////////////////////////////////////////////////////////// // Other stuff: pliang: not sure exactly where they should go. @@ -1054,12 +967,10 @@ public static class CaseGRAnnotation public static final GrammaticalRelation PREPOSITIONAL_LOCALIZER_MODIFIER = new GrammaticalRelation(Language.Chinese, "plmod", "prepositional localizer modifier", - PrepositionalLocalizerModifierGRAnnotation.class, MODIFIER, "PP", tregexCompiler, + MODIFIER, "PP", tregexCompiler, new String[]{ "PP < ( P $++ LCP=target )" }); - public static class PrepositionalLocalizerModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "adjectival complement" grammatical relation. @@ -1070,12 +981,10 @@ public static class PrepositionalLocalizerModifierGRAnnotation public static final GrammaticalRelation ADJECTIVAL_COMPLEMENT = new GrammaticalRelation(Language.Chinese, "acomp", "adjectival complement", - AdjectivalComplementGRAnnotation.class, COMPLEMENT, "VP", tregexCompiler, + COMPLEMENT, "VP", tregexCompiler, new String[]{ "VP < (ADJP=target !$-- NP)" }); - public static class AdjectivalComplementGRAnnotation - extends GrammaticalRelationAnnotation { } */ /** @@ -1093,12 +1002,10 @@ public static class AdjectivalComplementGRAnnotation public static final GrammaticalRelation LOCALIZER_COMPLEMENT = new GrammaticalRelation(Language.Chinese, "loc", "localizer complement", - LocalizerComplementGRAnnotation.class, COMPLEMENT, "VP|IP", tregexCompiler, + COMPLEMENT, "VP|IP", tregexCompiler, new String[]{ "VP|IP < LCP=target " }); - public static class LocalizerComplementGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "resultative complement" grammatical relation. @@ -1106,23 +1013,20 @@ public static class LocalizerComplementGRAnnotation public static final GrammaticalRelation RESULTATIVE_COMPLEMENT = new GrammaticalRelation(Language.Chinese, "rcomp", "result verb", - ResultativeComplementGRAnnotation.class, COMPLEMENT, "VRD", tregexCompiler, + COMPLEMENT, "VRD", tregexCompiler, new String[]{ "VRD < ( /V*/ $+ /V*/=target )" }); - public static class ResultativeComplementGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "ba" grammatical relation. */ public static final GrammaticalRelation BA = new GrammaticalRelation(Language.Chinese, "ba", "ba", - BaGRAnnotation.class, DEPENDENT, "VP|IP", tregexCompiler, + DEPENDENT, "VP|IP", tregexCompiler, new String[]{ "VP|IP < BA=target " }); - public static class BaGRAnnotation extends GrammaticalRelationAnnotation { } /** * The "classifier modifier" grammatical relation. @@ -1141,13 +1045,11 @@ public static class BaGRAnnotation extends GrammaticalRelationAnnotation { } public static final GrammaticalRelation CLASSIFIER_MODIFIER = new GrammaticalRelation(Language.Chinese, "clf", "classifier modifier", - ClassifierModifierGRAnnotation.class, MODIFIER, "^NP|DP|QP", tregexCompiler, + MODIFIER, "^NP|DP|QP", tregexCompiler, new String[]{ "NP|QP < ( QP =target << M $++ NN|NP|QP)", "DP < ( DT $+ CLP=target )" }); - public static class ClassifierModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "possession modifier" grammatical relation. @@ -1161,12 +1063,10 @@ public static class ClassifierModifierGRAnnotation public static final GrammaticalRelation POSSESSION_MODIFIER = new GrammaticalRelation(Language.Chinese, "poss", "possession modifier", - PossessionModifierGRAnnotation.class, MODIFIER, "NP", tregexCompiler, + MODIFIER, "NP", tregexCompiler, new String[]{ "NP < ( PN=target $+ DEC $+ NP )" }); - public static class PossessionModifierGRAnnotation - extends GrammaticalRelationAnnotation { } */ /** @@ -1177,13 +1077,10 @@ public static class PossessionModifierGRAnnotation /* public static final GrammaticalRelation POSSESSIVE_MODIFIER = new GrammaticalRelation(Language.Chinese, "possm", "possessive marker", - PossessiveModifierGRAnnotation.class, MODIFIER, "NP", tregexCompiler, new String[]{ "NP < ( PN $+ DEC=target ) " }); - public static class PossessiveModifierGRAnnotation - extends GrammaticalRelationAnnotation { } */ /** @@ -1207,15 +1104,12 @@ public static class PossessiveModifierGRAnnotation public static final GrammaticalRelation PREPOSITIONAL_MODIFIER = new GrammaticalRelation(Language.Chinese, "prep", "prepositional modifier", - PrepositionalModifierGRAnnotation.class, MARK, "^NP|VP|IP", tregexCompiler, new String[]{ "/^NP/ < /^PP/=target", "VP < /^PP/=target", "IP < /^PP/=target " }); - public static class PrepositionalModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "participial modifier" (prtmod) grammatical relation. @@ -1223,12 +1117,10 @@ public static class PrepositionalModifierGRAnnotation public static final GrammaticalRelation PART_VERB = new GrammaticalRelation(Language.Chinese, "prtmod", "particle verb", - ParticipialModifierGRAnnotation.class, MODIFIER, "VP|IP", tregexCompiler, + MODIFIER, "VP|IP", tregexCompiler, new String[]{ "VP|IP < ( MSP=target )" }); - public static class ParticipialModifierGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "etc" grammatical relation. @@ -1239,12 +1131,10 @@ public static class ParticipialModifierGRAnnotation */ public static final GrammaticalRelation ETC = new GrammaticalRelation(Language.Chinese, "etc", "ETC", - EtcGRAnnotation.class, MODIFIER, "^NP", tregexCompiler, + MODIFIER, "^NP", tregexCompiler, new String[]{ "/^NP/ < (NN|NR . ETC=target)" }); - public static class EtcGRAnnotation - extends GrammaticalRelationAnnotation { } /** * The "xsubj" grammatical relation. @@ -1274,14 +1164,12 @@ public static class EtcGRAnnotation public static final GrammaticalRelation CONTROLLED_SUBJECT = new GrammaticalRelation(Language.Chinese, "xsubj", "controlled subject", - ControllingSubjectGRAnnotation.class, DEPENDENT, "VP", tregexCompiler, + DEPENDENT, "VP", tregexCompiler, new String[] { "VP !< NP < VP > (IP !$- NP !< NP !>> (VP < VC ) >+(VP) (VP $-- NP=target))" }); - public static class ControllingSubjectGRAnnotation - extends GrammaticalRelationAnnotation { } - // Universal GrammaticalRelation's + // Universal GrammaticalRelations private static final GrammaticalRelation chineseOnly = null; // Place-holder: put this after a relation to mark it as Chinese-only private static final GrammaticalRelation[] rawValues = { DEPENDENT, diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/ExciseNode.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/ExciseNode.java index d896f49ff8..ec3ac318fb 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/ExciseNode.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/ExciseNode.java @@ -32,8 +32,13 @@ public Tree evaluate(Tree t, TregexMatcher m) { t.pennPrint(System.err); System.err.println("top: " + topNode + "\nbottom:" + bottomNode); } - if(topNode==t) - return null; + if (topNode == t) { + if (bottomNode.children().length == 1) { + return bottomNode.children()[0]; + } else { + return null; + } + } Tree parent = topNode.parent(t); if(Tsurgeon.verbose) System.err.println("Parent: " + parent); diff --git a/test/src/edu/stanford/nlp/parser/shiftreduce/OracleTest.java b/test/src/edu/stanford/nlp/parser/shiftreduce/OracleTest.java index da6c2ccb0b..b72b6b204d 100644 --- a/test/src/edu/stanford/nlp/parser/shiftreduce/OracleTest.java +++ b/test/src/edu/stanford/nlp/parser/shiftreduce/OracleTest.java @@ -7,6 +7,7 @@ import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.Treebank; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -51,13 +52,13 @@ public List buildTestTreebank() { */ public void testEndToEndCompoundUnaries() { List binarizedTrees = buildTestTreebank(); - Oracle oracle = new Oracle(binarizedTrees, true); + Oracle oracle = new Oracle(binarizedTrees, true, Collections.singleton("ROOT")); runEndToEndTest(binarizedTrees, oracle); } public void testEndToEndSingleUnaries() { List binarizedTrees = buildTestTreebank(); - Oracle oracle = new Oracle(binarizedTrees, false); + Oracle oracle = new Oracle(binarizedTrees, false, Collections.singleton("ROOT")); runEndToEndTest(binarizedTrees, oracle); } diff --git a/test/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParserTest.java b/test/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParserTest.java index 9552c3d3a0..95f3e768dd 100644 --- a/test/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParserTest.java +++ b/test/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParserTest.java @@ -3,6 +3,7 @@ import junit.framework.TestCase; import java.util.Arrays; +import java.util.Collections; import java.util.List; import edu.stanford.nlp.ling.Sentence; @@ -39,7 +40,7 @@ public class ShiftReduceParserTest extends TestCase { public void testUnaryTransitions() { for (String treeText : treeStrings) { Tree tree = convertTree(treeText); - List transitions = CreateTransitionSequence.createTransitionSequence(tree, false); + List transitions = CreateTransitionSequence.createTransitionSequence(tree, false, Collections.singleton("ROOT"), Collections.singleton("ROOT")); State state = ShiftReduceParser.initialStateFromGoldTagTree(tree); for (Transition transition : transitions) { state = transition.apply(state); @@ -54,7 +55,7 @@ public void testUnaryTransitions() { public void testCompoundUnaryTransitions() { for (String treeText : treeStrings) { Tree tree = convertTree(treeText); - List transitions = CreateTransitionSequence.createTransitionSequence(tree, true); + List transitions = CreateTransitionSequence.createTransitionSequence(tree, true, Collections.singleton("ROOT"), Collections.singleton("ROOT")); State state = ShiftReduceParser.initialStateFromGoldTagTree(tree); for (Transition transition : transitions) { state = transition.apply(state); @@ -74,7 +75,7 @@ Tree convertTree(String treeText) { public void testSeparators() { Tree tree = convertTree(commaTreeString); - List transitions = CreateTransitionSequence.createTransitionSequence(tree, true); + List transitions = CreateTransitionSequence.createTransitionSequence(tree, true, Collections.singleton("ROOT"), Collections.singleton("ROOT")); List expectedTransitions = Arrays.asList(new String[] { "Shift", "Shift", "Shift", "Shift", "RightBinary(@ADJP)", "RightBinary(ADJP)", "Shift", "RightBinary(@NP)", "RightBinary(NP)", "CompoundUnary*([ROOT, FRAG])", "Finalize", "Idle" }); assertEquals(expectedTransitions, CollectionUtils.transformAsList(transitions, new Function() { public String apply(Transition t) { return t.toString(); } })); diff --git a/test/src/edu/stanford/nlp/pipeline/AnnotationTest.java b/test/src/edu/stanford/nlp/pipeline/AnnotationTest.java new file mode 100644 index 0000000000..7aa430a447 --- /dev/null +++ b/test/src/edu/stanford/nlp/pipeline/AnnotationTest.java @@ -0,0 +1,35 @@ +package edu.stanford.nlp.pipeline; + +import junit.framework.TestCase; + +import java.util.List; +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.ling.Sentence; +import edu.stanford.nlp.util.ArrayCoreMap; +import edu.stanford.nlp.util.CoreMap; +import edu.stanford.nlp.util.Generics; + +/** + * @author John Bauer + */ +public class AnnotationTest extends TestCase { + /** + * Test a bug a user reported where the text would wind up having the list toString used, adding extra [] + */ + public void testFromList() { + List sentences = Generics.newArrayList(); + + CoreMap sentence = new ArrayCoreMap(); + List words = Sentence.toCoreLabelList("This", "is", "a", "test", "."); + sentence.set(CoreAnnotations.TokensAnnotation.class, words); + sentences.add(sentence); + + Annotation annotation = new Annotation(sentences); + assertEquals("This is a test .", annotation.toString()); + + sentence.set(CoreAnnotations.TextAnnotation.class, "This is a test."); + annotation = new Annotation(sentences); + assertEquals("This is a test.", annotation.toString()); + } +} diff --git a/test/src/edu/stanford/nlp/trees/EnglishGrammaticalStructureTest.java b/test/src/edu/stanford/nlp/trees/EnglishGrammaticalStructureTest.java index 00b759bf0a..bd595c880d 100644 --- a/test/src/edu/stanford/nlp/trees/EnglishGrammaticalStructureTest.java +++ b/test/src/edu/stanford/nlp/trees/EnglishGrammaticalStructureTest.java @@ -1014,7 +1014,7 @@ public void testToBeRelations() { "dobj(expecting-5, pizza-6)\n", "nsubj(going-3, Who-1)\n" + - "xsubj(carry-5, Who-1)\n" + + "nsubj(carry-5, Who-1)\n" + "aux(going-3, is-2)\n" + "root(ROOT-0, going-3)\n" + "aux(carry-5, to-4)\n" + @@ -1311,8 +1311,8 @@ public void testNonCollapsedRelations() { // the expected dependency answers (basic + extra) String[] testAnswers = { - "nsubj(like-2, I-1)\n" + "xsubj(swim-4, I-1)\n" + "root(ROOT-0, like-2)\n" + "aux(swim-4, to-3)\n" + "xcomp(like-2, swim-4)\n", - "nsubj(says-2, He-1)\n" + "root(ROOT-0, says-2)\n" + "mark(like-5, that-3)\n" + "nsubj(like-5, you-4)\n" + "xsubj(swim-7, you-4)\n" + "ccomp(says-2, like-5)\n" + "aux(swim-7, to-6)\n" + "xcomp(like-5, swim-7)\n", + "nsubj(like-2, I-1)\n" + "nsubj(swim-4, I-1)\n" + "root(ROOT-0, like-2)\n" + "aux(swim-4, to-3)\n" + "xcomp(like-2, swim-4)\n", + "nsubj(says-2, He-1)\n" + "root(ROOT-0, says-2)\n" + "mark(like-5, that-3)\n" + "nsubj(like-5, you-4)\n" + "nsubj(swim-7, you-4)\n" + "ccomp(says-2, like-5)\n" + "aux(swim-7, to-6)\n" + "xcomp(like-5, swim-7)\n", "nsubj(saw-2, I-1)\n" + "root(ROOT-0, saw-2)\n" + "det(man-4, the-3)\n" + "dobj(saw-2, man-4)\n" + "ref(man-4, who-5)\n" + "dobj(love-7, who-5)\n" + "nsubj(love-7, you-6)\n" + "rcmod(man-4, love-7)\n", "nsubj(saw-2, I-1)\n" + "root(ROOT-0, saw-2)\n" + "det(man-4, the-3)\n" + "dobj(saw-2, man-4)\n" + "ref(man-4, whose-5)\n" + "poss(wife-6, whose-5)\n" + "dobj(love-8, wife-6)\n" + "nsubj(love-8, you-7)\n" + "rcmod(man-4, love-8)\n", "nsubj(saw-2, I-1)\n" + "root(ROOT-0, saw-2)\n" + "det(book-4, the-3)\n" + "dobj(saw-2, book-4)\n" + "ref(book-4, which-5)\n" + "dobj(bought-7, which-5)\n" + "nsubj(bought-7, you-6)\n" + "rcmod(book-4, bought-7)\n", @@ -1361,7 +1361,7 @@ public void testNonCollapsedSeparator() { // the expected dependency answers (basic + extra) String[] testAnswers = { - "nsubj(like-2, I-1)\n" + "root(ROOT-0, like-2)\n" + "aux(swim-4, to-3)\n" + "xcomp(like-2, swim-4)\n" + "======\n" + "xsubj(swim-4, I-1)\n", + "nsubj(like-2, I-1)\n" + "root(ROOT-0, like-2)\n" + "aux(swim-4, to-3)\n" + "xcomp(like-2, swim-4)\n" + "======\n" + "nsubj(swim-4, I-1)\n", }; @@ -1424,11 +1424,11 @@ public void testCollapsedRelations() { String[] testAnswers = { "nsubjpass(defeated-3, Dole-1)\n" + "auxpass(defeated-3, was-2)\n" + "root(ROOT-0, defeated-3)\n" + "agent(defeated-3, Clinton-5)\n", "mark(lied-3, That-1)\n" + "nsubj(lied-3, she-2)\n" + "csubjpass(suspected-5, lied-3)\n" + "auxpass(suspected-5, was-4)\n" + "root(ROOT-0, suspected-5)\n" + "agent(suspected-5, everyone-7)\n", - "nsubj(like-2, I-1)\n" + "xsubj(swim-4, I-1)\n" + "root(ROOT-0, like-2)\n" + "aux(swim-4, to-3)\n" + "xcomp(like-2, swim-4)\n", + "nsubj(like-2, I-1)\n" + "nsubj(swim-4, I-1)\n" + "root(ROOT-0, like-2)\n" + "aux(swim-4, to-3)\n" + "xcomp(like-2, swim-4)\n", "nsubj(sat-2, I-1)\n" + "root(ROOT-0, sat-2)\n" + "det(chair-5, the-4)\n" + "prep_on(sat-2, chair-5)\n", "nsubj(have-2, We-1)\n" + "root(ROOT-0, have-2)\n" + "neg(information-5, no-3)\n" + "amod(information-5, useful-4)\n" + "dobj(have-2, information-5)\n" + "mark(are-9, whether-7)\n" + "nsubj(are-9, users-8)\n" + "prepc_on(information-5, are-9)\n" + "prep_at(are-9, risk-11)\n", "nsubj(heard-2, They-1)\n" + "root(ROOT-0, heard-2)\n" + "prep_about(heard-2, asbestos-4)\n" + "xcomp(heard-2, having-5)\n" + "amod(properties-7, questionable-6)\n" + "dobj(having-5, properties-7)\n", - "nsubj(says-2, He-1)\n" + "root(ROOT-0, says-2)\n" + "mark(like-5, that-3)\n" + "nsubj(like-5, you-4)\n" + "xsubj(swim-7, you-4)\n" + "ccomp(says-2, like-5)\n" + "aux(swim-7, to-6)\n" + "xcomp(like-5, swim-7)\n", + "nsubj(says-2, He-1)\n" + "root(ROOT-0, says-2)\n" + "mark(like-5, that-3)\n" + "nsubj(like-5, you-4)\n" + "nsubj(swim-7, you-4)\n" + "ccomp(says-2, like-5)\n" + "aux(swim-7, to-6)\n" + "xcomp(like-5, swim-7)\n", "nn(forces-2, U.S.-1)\n" + "nsubjpass(engaged-5, forces-2)\n" + "aux(engaged-5, have-3)\n" + "auxpass(engaged-5, been-4)\n" + "root(ROOT-0, engaged-5)\n" + "amod(fighting-8, intense-7)\n" + "prep_in(engaged-5, fighting-8)\n" + "mark(launched-11, after-9)\n" + "nsubj(launched-11, insurgents-10)\n" + "advcl(engaged-5, launched-11)\n" + "amod(attacks-13, simultaneous-12)\n" + "dobj(launched-11, attacks-13)\n", "nsubj(saw-2, I-1)\n" + "root(ROOT-0, saw-2)\n" + "det(man-4, the-3)\n" + "dobj(saw-2, man-4)\n" + "dobj(love-7, man-4)\n" + "nsubj(love-7, you-6)\n" + "rcmod(man-4, love-7)\n", "nsubj(saw-2, I-1)\n" + "root(ROOT-0, saw-2)\n" + "det(man-4, the-3)\n" + "dobj(saw-2, man-4)\n" + "poss(wife-6, man-4)\n" + "dobj(love-8, wife-6)\n" + "nsubj(love-8, you-7)\n" + "rcmod(man-4, love-8)\n", diff --git a/test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java b/test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java index 9ca9b8c0ca..e7a7c0dcc5 100644 --- a/test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java +++ b/test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java @@ -554,6 +554,20 @@ public void testIfExists() { runTest(tregex, tsurgeon, "(A (B foo) (C foo))", "(A (BAR foo) (BAZ foo))"); } + public void testExcise() { + // TODO: needs more meat to this test + TregexPattern tregex = TregexPattern.compile("__=repeat <: (~repeat < __)"); + TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("excise repeat repeat"); + runTest(tregex, tsurgeon, "(A (B (B foo)))", "(A (B foo))"); + // Test that if a deleted root is excised down to a level that has + // just one child, that one child gets returned as the new tree + runTest(tregex, tsurgeon, "(B (B foo))", "(B foo)"); + + tregex = TregexPattern.compile("A=root"); + tsurgeon = Tsurgeon.parseOperation("excise root root"); + runTest(tregex, tsurgeon, "(A (B bar) (C foo))", null); + } + public static void runTest(TregexPattern tregex, TsurgeonPattern tsurgeon, String input, String expected) { Tree result = Tsurgeon.processPattern(tregex, tsurgeon,