Skip to content

Commit

Permalink
Improve name relation, add option.
Browse files Browse the repository at this point in the history
  • Loading branch information
sebschu authored and Stanford NLP committed Jul 18, 2016
1 parent 42729bd commit b9d3e60
Showing 1 changed file with 26 additions and 8 deletions.
Expand Up @@ -41,6 +41,7 @@ public class UniversalEnglishGrammaticalStructure extends GrammaticalStructure

private static final boolean DEBUG = System.getProperty("UniversalEnglishGrammaticalStructure", null) != null;

private static final boolean USE_NAME = System.getProperty("UDUseNameRelation") != null;

/*
* Options for "Enhanced" representation:
Expand Down Expand Up @@ -1804,8 +1805,8 @@ private static void demoteQmodParentHelper(SemanticGraph sg, IndexedWord gov, In


private static final SemgrexPattern[] NAME_PATTERNS = {
SemgrexPattern.compile("{ner:PERSON}=w1 >compound {ner:PERSON}=w2"),
SemgrexPattern.compile("{ner:LOCATION}=w1 >compound {ner:LOCATION}=w2")
SemgrexPattern.compile("{ner:PERSON}=w1 >compound {}=w2"),
SemgrexPattern.compile("{ner:LOCATION}=w1 >compound {}=w2")
};
private static final Predicate<String> PUNCT_TAG_FILTER = new PennTreebankLanguagePack().punctuationWordRejectFilter();

Expand All @@ -1822,6 +1823,10 @@ private static void demoteQmodParentHelper(SemanticGraph sg, IndexedWord gov, In
*/
private static void processNames(SemanticGraph sg) {

if ( ! USE_NAME) {
return;
}

// check whether NER tags are available
IndexedWord rootToken = sg.getFirstRoot();
if (rootToken == null || !rootToken.containsKey(CoreAnnotations.NamedEntityTagAnnotation.class)) {
Expand All @@ -1843,7 +1848,9 @@ private static void processNames(SemanticGraph sg) {
}
head = w1;
}
nameParts.add(w2);
if (w2.ner().equals(w1.ner())) {
nameParts.add(w2);
}
}
if (head != null) {
processNamesHelper(sg, head, nameParts);
Expand All @@ -1856,6 +1863,18 @@ private static void processNames(SemanticGraph sg) {
private static void processNamesHelper(SemanticGraph sg, IndexedWord oldHead, List<IndexedWord> nameParts) {

if (nameParts.size() < 1) {
// if the named entity only spans one token, change compound relations
// to nmod relations to get the right structure for NPs with additional modifiers
// such as "Mrs. Clinton".
Set<IndexedWord> children = new HashSet<>(sg.getChildren(oldHead));
for (IndexedWord child : children) {
SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child);
if (oldEdge.getRelation() == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER) {
sg.addEdge(oldHead, child, UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER,
oldEdge.getWeight(), oldEdge.isExtra());
sg.removeEdge(oldEdge);
}
}
return;
}

Expand Down Expand Up @@ -1900,14 +1919,13 @@ private static void processNamesHelper(SemanticGraph sg, IndexedWord oldHead, Li
} else {
// attach word to new head
SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child);
sg.addEdge(newHead, child, oldEdge.getRelation(), oldEdge.getWeight(), oldEdge.isExtra());
//if not the entire compound is part of a named entity, attach the other tokens via an nmod relation
GrammaticalRelation reln = oldEdge.getRelation() == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER ?
UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER : oldEdge.getRelation();
sg.addEdge(newHead, child, reln, oldEdge.getWeight(), oldEdge.isExtra());
sg.removeEdge(oldEdge);
}
}

//TODO[sebschu]: Do something about honorific titles in front of names.


}

/**
Expand Down

0 comments on commit b9d3e60

Please sign in to comment.