Skip to content

Commit

Permalink
Use the same annotator fudging logic in the server as well as the mai…
Browse files Browse the repository at this point in the history
…n program
  • Loading branch information
AngledLuffa committed Mar 20, 2022
1 parent d694e20 commit b0d1e46
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
21 changes: 14 additions & 7 deletions src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java
Original file line number Diff line number Diff line change
Expand Up @@ -255,13 +255,7 @@ public StanfordCoreNLP(Properties props, boolean enforceRequirements, AnnotatorP
this.properties.setProperty("annotators", newAnnotators);
}

// if cleanxml is requested and tokenize is here,
// make it part of tokenize rather than its own annotator
unifyTokenizeProperty(this.properties, STANFORD_CLEAN_XML, STANFORD_TOKENIZE + "." + STANFORD_CLEAN_XML);
// ssplit is always part of tokenize now
unifyTokenizeProperty(this.properties, STANFORD_SSPLIT, null);
// cdc_tokenize is also absorbed into tokenize
replaceAnnotator(this.properties, STANFORD_CDC_TOKENIZE, STANFORD_TOKENIZE);
normalizeAnnotators(this.properties);

// cdm [2017]: constructAnnotatorPool (PropertiesUtils.getSignature) requires non-null Properties, so after properties setup
this.pool = annotatorPool != null ? annotatorPool : constructAnnotatorPool(props, getAnnotatorImplementations());
Expand Down Expand Up @@ -311,6 +305,19 @@ public StanfordCoreNLP(Properties props, boolean enforceRequirements, AnnotatorP
this.pipelineSetupTime = tim.report();
}

/**
* update the annotators, hopefully in a backwards compatible manner
*/
static void normalizeAnnotators(Properties properties) {
// if cleanxml is requested and tokenize is here,
// make it part of tokenize rather than its own annotator
unifyTokenizeProperty(properties, STANFORD_CLEAN_XML, STANFORD_TOKENIZE + "." + STANFORD_CLEAN_XML);
// ssplit is always part of tokenize now
unifyTokenizeProperty(properties, STANFORD_SSPLIT, null);
// cdc_tokenize is also absorbed into tokenize
replaceAnnotator(properties, STANFORD_CDC_TOKENIZE, STANFORD_TOKENIZE);
}

/**
* The cdc_tokenize annotator is now part of tokenize
*/
Expand Down
1 change: 1 addition & 0 deletions src/edu/stanford/nlp/pipeline/StanfordCoreNLPServer.java
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ private Properties getProperties(HttpExchange httpExchange) throws UnsupportedEn
urlProperties.forEach(props::setProperty);

// Get the annotators
StanfordCoreNLP.normalizeAnnotators(props);
String annotators = props.getProperty("annotators");
// If the properties contains a custom annotator, then do not enforceRequirements.
if (annotators != null && !PropertiesUtils.hasPropertyPrefix(props, CUSTOM_ANNOTATOR_PREFIX) && PropertiesUtils.getBool(props, "enforceRequirements", true)) {
Expand Down

0 comments on commit b0d1e46

Please sign in to comment.