From 61ef545efac3eda7c46f29b3c01a38c8aa26a924 Mon Sep 17 00:00:00 2001 From: John Bauer Date: Wed, 2 Aug 2023 12:00:58 -0700 Subject: [PATCH] Separate off the NML part of the nn relationship so that we can do it without the comma restriction. This especially finds lists of NML as nn modifiers of a noun --- src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java b/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java index c17d82731c..bc86b0d4ab 100644 --- a/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java +++ b/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java @@ -1018,7 +1018,10 @@ private EnglishGrammaticalRelations() {} public static final GrammaticalRelation NOUN_COMPOUND_MODIFIER = new GrammaticalRelation(Language.English, "nn", "nn modifier", MODIFIER, "(?:WH)?(?:NP|NX|NAC|NML|ADVP|ADJP)(?:-TMP|-ADV)?", tregexCompiler, - "/^(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?$/ < (NP|NML|NN|NNS|NNP|NNPS|FW|AFX=target $++ NN|NNS|NNP|NNPS|FW|CD=sister !<<- POS !<<- (VBZ < /^[\'’]s$/) !$- /^,$/ !$++ (POS $++ =sister))", + "/^(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?$/ < (NP|NN|NNS|NNP|NNPS|FW|AFX=target $++ NN|NNS|NNP|NNPS|FW|CD=sister !<<- POS !<<- (VBZ < /^[\'’]s$/) !$- /^,$/ !$++ (POS $++ =sister))", + // same thing as the above, but without the comma. NML in such a situation is typically a noun phrase modifying a noun, + // whereas other nodes such as NN can be parts of lists or otherwise unsuitable for the nn relationship + "/^(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?$/ < (NML=target $++ NN|NNS|NNP|NNPS|FW|CD=sister !<<- POS !<<- (VBZ < /^[\'’]s$/) !$++ (POS $++ =sister))", "/^(?:WH)?(?:NP|NX|NAC|NML)(?:-TMP|-ADV)?$/ < JJ|JJR|JJS=sister < (NP|NML|NN|NNS|NNP|NNPS|FW=target !<<- POS !<<- (VBZ < /^[\'’]s$/) $+ =sister) <# NN|NNS|NNP|NNPS !<<- POS !<<- (VBZ < /^[\'’]s$/) ", // in vitro, in vivo, etc., in Genia // matches against "etc etc"