Skip to content

Commit

Permalink
Add a FLAT relation for phrases such as 'en masse' which aren't consi…
Browse files Browse the repository at this point in the history
…dered a FIXED or MWT expression. Addresses a tiny part of UniversalDependencies/docs#717
  • Loading branch information
AngledLuffa committed Feb 24, 2024
1 parent bc4acf1 commit cb338cd
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 2 deletions.
21 changes: 19 additions & 2 deletions src/edu/stanford/nlp/trees/CoordinationTransformer.java
Expand Up @@ -105,6 +105,11 @@ public Tree transformTree(Tree t) {
log.info("After MWETransform: " + t);
}

t = MWFlatTransform(t);
if (VERBOSE) {
log.info("After MWFlatTransform: " + t);
}

t = prepCCTransform(t);
if (VERBOSE) {
log.info("After prepCCTransform: " + t);
Expand Down Expand Up @@ -688,7 +693,6 @@ private static Tree findCCparent(Tree t, Tree root) {
TregexPattern.compile("@QP|XS < ((JJR|RBR||RB|RP|IN=node1 < /^(?i)(up)$/) $+ (IN|TO=node2 < /^(?i)to$/))"), // up to
TregexPattern.compile("@QP < ((JJR|RBR|RB|RP|IN=node1 < /^(?i)up$/) $+ (IN|TO=node2 < /^(?i)to$/))"), //up to
TregexPattern.compile("@S|SQ|VP|ADVP|PP < (@ADVP < ((IN|RB=node1 < /^(?i)at$/) $+ (JJS|RBS=node2 < /^(?i)least$/)) !$+ (RB < /(?i)(once|twice)/))"), //at least

};

private static final TsurgeonPattern MWE_OPERATION = Tsurgeon.parseOperation("[createSubtree MWE node1 node2] [if exists node3 move node3 $- node2]");
Expand Down Expand Up @@ -727,7 +731,20 @@ public static Tree MWETransform(Tree t) {
return t;
}


private static final TregexPattern[] MW_FLAT_PATTERNS = {
TregexPattern.compile("@NP|ADVP <... {(__=node1 < /^(?i)en$/); (__=node2 < /^(?i)masse$/)}"), // en masse, which is tagged in different ways in PTB
};

private static final TsurgeonPattern MW_FLAT_OPERATION = Tsurgeon.parseOperation("[createSubtree FLAT node1 node2] [if exists node3 move node3 $- node2]");

public static Tree MWFlatTransform(Tree t) {
for (TregexPattern p : MW_FLAT_PATTERNS) {
Tsurgeon.processPattern(p, MW_FLAT_OPERATION, t);
}

return t;
}

private static final TregexPattern FLAT_PREP_CC_PATTERN = TregexPattern.compile("PP <, (/^(IN|TO)$/=p1 $+ (CC=cc $+ /^(IN|TO)$/=p2))");
private static final TsurgeonPattern FLAT_PREP_CC_OPERATION = Tsurgeon.parseOperation("[createSubtree PCONJP p1 cc] [move p2 $- cc]");

Expand Down
2 changes: 2 additions & 0 deletions src/edu/stanford/nlp/trees/GrammaticalStructure.java
Expand Up @@ -164,6 +164,7 @@ public GrammaticalStructure(Tree t, Collection<GrammaticalRelation> relations,
// avoiding a wasteful copy of the labels.
Trees.setLeafLabels(treeGraph, t.yield());
Trees.setLeafTagsIfUnset(treeGraph);
//System.out.println(treeGraph.toPrettyString(2));
if (transformer != null) {
Tree transformed = transformer.transformTree(treeGraph);
if (!(transformed instanceof TreeGraphNode)) {
Expand All @@ -173,6 +174,7 @@ public GrammaticalStructure(Tree t, Collection<GrammaticalRelation> relations,
} else {
this.root = treeGraph;
}
//System.out.println(this.root.toPrettyString(2));
indexNodes(this.root);
// add head word and tag to phrase nodes
if (hf == null) {
Expand Down
Expand Up @@ -1235,6 +1235,11 @@ private UniversalEnglishGrammaticalRelations() {}
MODIFIER, "MWE", tregexCompiler,
"MWE < (IN|TO|RB|NP|NN|JJ|VB|CC|VBZ|VBD|ADVP|PP|JJS|RBS=target)");

public static final GrammaticalRelation FLAT_EXPRESSION =
new GrammaticalRelation(Language.UniversalEnglish, "flat", "flat expression",
MODIFIER, "FLAT", tregexCompiler,
"FLAT < (IN|FW=target)");

/**
* The "determiner" grammatical relation.
* <br>
Expand Down Expand Up @@ -1613,6 +1618,7 @@ private UniversalEnglishGrammaticalRelations() {}
ADVERBIAL_MODIFIER,
NEGATION_MODIFIER,
MULTI_WORD_EXPRESSION,
FLAT_EXPRESSION,
DETERMINER,
PREDETERMINER,
PRECONJUNCT,
Expand Down
Expand Up @@ -194,6 +194,7 @@ private void ruleChanges() {

// Special constituent for multi-word expressions
nonTerminalInfo.put("MWE", new String[][]{{"left"}});
nonTerminalInfo.put("FLAT", new String[][]{{"left"}});

nonTerminalInfo.put("PCONJP", new String[][]{{"left"}});

Expand Down

0 comments on commit cb338cd

Please sign in to comment.