Skip to content

Commit

Permalink
Create custom annotator for UD features.
Browse files Browse the repository at this point in the history
  • Loading branch information
sebschu authored and Stanford NLP committed Oct 16, 2015
1 parent 6e117da commit b8a9bbe
Show file tree
Hide file tree
Showing 11 changed files with 208 additions and 121 deletions.
Expand Up @@ -16,13 +16,11 @@ hcoref.path.traindata = /scr/nlp/data/conll-2012/v4/data/train/data/english/anno
# data & model path # data & model path


## models ## models
#hcoref.path.serialized = /scr/nlp/data/coref/models/stanford/ hcoref.path.serialized = /scr/nlp/data/coref/models/stanford/
hcoref.path.serialized = edu/stanford/nlp/models/hcoref/


## other data ## other data
hcoref.big.gender.number = edu/stanford/nlp/models/dcoref/gender.data.gz hcoref.big.gender.number = edu/stanford/nlp/models/dcoref/gender.data.gz
#hcoref.path.word2vec = /scr/nlp/data/coref/wordvectors/en/vectors.txt.gz hcoref.path.word2vec = /scr/nlp/data/coref/wordvectors/en/vectors.txt.gz
hcoref.path.word2vec = edu/stanford/nlp/models/word2vec/vectors.txt.gz


############################################################################# #############################################################################
# mention detection # mention detection
Expand Down
177 changes: 94 additions & 83 deletions src/edu/stanford/nlp/pipeline/Annotator.java
Expand Up @@ -108,6 +108,7 @@ public String toString() {
String STANFORD_NATLOG = "natlog"; String STANFORD_NATLOG = "natlog";
String STANFORD_OPENIE = "openie"; String STANFORD_OPENIE = "openie";
String STANFORD_QUOTE = "quote"; String STANFORD_QUOTE = "quote";
String STANFORD_UD_FEATURES = "udfeats";


Requirement TOKENIZE_REQUIREMENT = new Requirement(STANFORD_TOKENIZE); Requirement TOKENIZE_REQUIREMENT = new Requirement(STANFORD_TOKENIZE);
Requirement CLEAN_XML_REQUIREMENT = new Requirement(STANFORD_CLEAN_XML); Requirement CLEAN_XML_REQUIREMENT = new Requirement(STANFORD_CLEAN_XML);
Expand All @@ -125,97 +126,105 @@ public String toString() {
Requirement NATLOG_REQUIREMENT = new Requirement(STANFORD_NATLOG); Requirement NATLOG_REQUIREMENT = new Requirement(STANFORD_NATLOG);
Requirement OPENIE_REQUIREMENT = new Requirement(STANFORD_OPENIE); Requirement OPENIE_REQUIREMENT = new Requirement(STANFORD_OPENIE);
Requirement QUOTE_REQUIREMENT = new Requirement(STANFORD_QUOTE); Requirement QUOTE_REQUIREMENT = new Requirement(STANFORD_QUOTE);
Requirement UD_FEATURES_REQUIREMENT = new Requirement(STANFORD_UD_FEATURES);


/** /**
* A map from annotator name to a set of requirements for that annotator. * A map from annotator name to a set of requirements for that annotator.
* This is useful to have here for the purpose of static analysis on an * This is useful to have here for the purpose of static analysis on an
* annotators list. * annotators list.
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
Map<String, Set<Requirement>> REQUIREMENTS = Collections.unmodifiableMap(new HashMap<String, Set<Requirement>>() {{ Map<String, Set<Requirement>> REQUIREMENTS = Collections.unmodifiableMap(new HashMap<String, Set<Requirement>>() {
put(STANFORD_TOKENIZE, Collections.EMPTY_SET); {
put(STANFORD_CLEAN_XML, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_TOKENIZE, Collections.EMPTY_SET);
put(STANFORD_CLEAN_XML, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); // A requirement for STANFORD_CLEAN_XML add(TOKENIZE_REQUIREMENT); // A requirement for STANFORD_CLEAN_XML
}})); }}));
put(STANFORD_SSPLIT, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_SSPLIT, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
}})); }}));
put(STANFORD_POS, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_POS, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
}})); }}));
put(STANFORD_LEMMA, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_LEMMA, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
}})); }}));
put(STANFORD_NER, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_NER, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT); add(LEMMA_REQUIREMENT);
}})); }}));
put(STANFORD_GENDER, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_GENDER, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
}})); }}));
put(STANFORD_TRUECASE, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_TRUECASE, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT); add(LEMMA_REQUIREMENT);
}})); }}));
put(STANFORD_PARSE, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_PARSE, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
}})); }}));
put(STANFORD_DEPENDENCIES, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_DEPENDENCIES, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
}})); }}));
put(STANFORD_DETERMINISTIC_COREF, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_DETERMINISTIC_COREF, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT); add(LEMMA_REQUIREMENT);
add(NER_REQUIREMENT); add(NER_REQUIREMENT);
add(PARSE_REQUIREMENT); add(PARSE_REQUIREMENT);
}})); }}));
put(STANFORD_COREF, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_COREF, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT); add(LEMMA_REQUIREMENT);
add(NER_REQUIREMENT); add(NER_REQUIREMENT);
add(PARSE_REQUIREMENT); add(PARSE_REQUIREMENT);
}})); }}));
put(STANFORD_RELATION, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_RELATION, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT); add(LEMMA_REQUIREMENT);
add(NER_REQUIREMENT); add(NER_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT); add(DEPENDENCY_REQUIREMENT);
}})); }}));
put(STANFORD_NATLOG, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_NATLOG, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
add(LEMMA_REQUIREMENT); add(LEMMA_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT); // TODO(gabor) can also use 'parse' annotator, technically add(DEPENDENCY_REQUIREMENT); // TODO(gabor) can also use 'parse' annotator, technically
}})); }}));
put(STANFORD_OPENIE, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_OPENIE, Collections.unmodifiableSet(new HashSet<Requirement>() {{
add(TOKENIZE_REQUIREMENT); add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT); add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT); add(POS_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT); // TODO(gabor) can also use 'parse' annotator, technically add(DEPENDENCY_REQUIREMENT); // TODO(gabor) can also use 'parse' annotator, technically
add(NATLOG_REQUIREMENT); add(NATLOG_REQUIREMENT);
}})); }}));
put(STANFORD_QUOTE, Collections.unmodifiableSet(new HashSet<Requirement>() {{ put(STANFORD_QUOTE, Collections.unmodifiableSet(new HashSet<Requirement>() {{
// No requirements // No requirements
}})); }}));
put(STANFORD_UD_FEATURES, Collections.unmodifiableSet(new HashSet<Requirement>(){{
add(TOKENIZE_REQUIREMENT);
add(SSPLIT_REQUIREMENT);
add(POS_REQUIREMENT);
add(DEPENDENCY_REQUIREMENT);
}}));
}}); }});


/** /**
Expand Down Expand Up @@ -252,5 +261,7 @@ public String toString() {
Set<Requirement> TOKENIZE_SSPLIT_POS_DEPPARSE = Collections.unmodifiableSet(new ArraySet<>(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, POS_REQUIREMENT, DEPENDENCY_REQUIREMENT)); Set<Requirement> TOKENIZE_SSPLIT_POS_DEPPARSE = Collections.unmodifiableSet(new ArraySet<>(TOKENIZE_REQUIREMENT, SSPLIT_REQUIREMENT, POS_REQUIREMENT, DEPENDENCY_REQUIREMENT));
Set<Requirement> PARSE_AND_TAG = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT)); Set<Requirement> PARSE_AND_TAG = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT));
Set<Requirement> PARSE_TAG_BINARIZED_TREES = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT, BINARIZED_TREES_REQUIREMENT)); Set<Requirement> PARSE_TAG_BINARIZED_TREES = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT, BINARIZED_TREES_REQUIREMENT));
Set<Requirement> PARSE_TAG_DEPPARSE_BINARIZED_TREES = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT, DEPENDENCY_REQUIREMENT, BINARIZED_TREES_REQUIREMENT));
Set<Requirement> PARSE_TAG_DEPPARSE = Collections.unmodifiableSet(new ArraySet<>(POS_REQUIREMENT, PARSE_REQUIREMENT, DEPENDENCY_REQUIREMENT));


} }
20 changes: 20 additions & 0 deletions src/edu/stanford/nlp/pipeline/AnnotatorFactories.java
Expand Up @@ -651,4 +651,24 @@ protected String additionalSignature() {
}; };
} }



//
// UD Features Extractor
//
public static AnnotatorFactory udfeats(Properties properties, final AnnotatorImplementations annotatorImpl) {
return new AnnotatorFactory(properties, annotatorImpl) {
private static final long serialVersionUID = -2525567112379296672L;

@Override
public Annotator create() {
return annotatorImpl.udfeats(properties);
}

@Override
protected String additionalSignature() {
return "";
}
};
}

} }
7 changes: 7 additions & 0 deletions src/edu/stanford/nlp/pipeline/AnnotatorImplementations.java
Expand Up @@ -263,4 +263,11 @@ public Annotator quote(Properties properties) {
return new QuoteAnnotator(relevantProperties); return new QuoteAnnotator(relevantProperties);
} }


/**
* Add universal dependencies features
*/
public Annotator udfeats(Properties properties) {
return new UDFeatureAnnotator();
}

} }
2 changes: 1 addition & 1 deletion src/edu/stanford/nlp/pipeline/CharniakParserAnnotator.java
Expand Up @@ -69,7 +69,7 @@ public void annotate(Annotation annotation) {


List<Tree> trees = Generics.newArrayList(1); List<Tree> trees = Generics.newArrayList(1);
trees.add(tree); trees.add(tree);
ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, trees, GrammaticalStructure.Extras.NONE, null); ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, trees, GrammaticalStructure.Extras.NONE);
} }
} else { } else {
throw new RuntimeException("unable to find sentences in: " + annotation); throw new RuntimeException("unable to find sentences in: " + annotation);
Expand Down
1 change: 0 additions & 1 deletion src/edu/stanford/nlp/pipeline/JSONOutputter.java
Expand Up @@ -128,7 +128,6 @@ public void print(Annotation doc, OutputStream target, Options options) throws I
// Add a single token // Add a single token
l3.set("index", token.index()); l3.set("index", token.index());
l3.set("word", token.word()); l3.set("word", token.word());
l3.set("originalText", token.originalText());
l3.set("lemma", token.lemma()); l3.set("lemma", token.lemma());
l3.set("characterOffsetBegin", token.beginPosition()); l3.set("characterOffsetBegin", token.beginPosition());
l3.set("characterOffsetEnd", token.endPosition()); l3.set("characterOffsetEnd", token.endPosition());
Expand Down
32 changes: 12 additions & 20 deletions src/edu/stanford/nlp/pipeline/ParserAnnotator.java
Expand Up @@ -63,8 +63,6 @@ public class ParserAnnotator extends SentenceAnnotator {


private final boolean keepPunct; private final boolean keepPunct;


private UniversalDependenciesFeatureAnnotator featureAnnotator = null;

/** If true, don't re-annotate sentences that already have a tree annotation */ /** If true, don't re-annotate sentences that already have a tree annotation */
private final boolean noSquash; private final boolean noSquash;
private final GrammaticalStructure.Extras extraDependencies; private final GrammaticalStructure.Extras extraDependencies;
Expand Down Expand Up @@ -96,13 +94,6 @@ public ParserAnnotator(ParserGrammar parser, boolean verbose, int maxSent, Funct
if (this.BUILD_GRAPHS) { if (this.BUILD_GRAPHS) {
TreebankLanguagePack tlp = parser.getTLPParams().treebankLanguagePack(); TreebankLanguagePack tlp = parser.getTLPParams().treebankLanguagePack();
this.gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder()); this.gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder());
if (this.gsf instanceof UniversalEnglishGrammaticalStructureFactory) {
try {
this.featureAnnotator = new UniversalDependenciesFeatureAnnotator();
} catch (IOException e) {
//do nothing
}
}
} else { } else {
this.gsf = null; this.gsf = null;
} }
Expand Down Expand Up @@ -155,13 +146,6 @@ public ParserAnnotator(String annotatorName, Properties props) {
TreebankLanguagePack tlp = parser.getTLPParams().treebankLanguagePack(); TreebankLanguagePack tlp = parser.getTLPParams().treebankLanguagePack();
Predicate<String> punctFilter = this.keepPunct ? Filters.acceptFilter() : tlp.punctuationWordRejectFilter(); Predicate<String> punctFilter = this.keepPunct ? Filters.acceptFilter() : tlp.punctuationWordRejectFilter();
this.gsf = tlp.grammaticalStructureFactory(punctFilter, parser.getTLPParams().typedDependencyHeadFinder()); this.gsf = tlp.grammaticalStructureFactory(punctFilter, parser.getTLPParams().typedDependencyHeadFinder());
if (this.gsf instanceof UniversalEnglishGrammaticalStructureFactory) {
try {
this.featureAnnotator = new UniversalDependenciesFeatureAnnotator();
} catch (IOException e) {
//do nothing
}
}
} else { } else {
this.gsf = null; this.gsf = null;
} }
Expand Down Expand Up @@ -302,7 +286,7 @@ private void finishSentence(CoreMap sentence, List<Tree> trees) {
trees = mappedTrees; trees = mappedTrees;
} }


ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, trees, extraDependencies, featureAnnotator); ParserAnnotatorUtils.fillInParseAnnotations(VERBOSE, BUILD_GRAPHS, gsf, sentence, trees, extraDependencies);


if (saveBinaryTrees) { if (saveBinaryTrees) {
TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack()); TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack());
Expand Down Expand Up @@ -352,10 +336,18 @@ public Set<Requirement> requires() {


@Override @Override
public Set<Requirement> requirementsSatisfied() { public Set<Requirement> requirementsSatisfied() {
if (this.saveBinaryTrees) { if (this.BUILD_GRAPHS) {
return PARSE_TAG_BINARIZED_TREES; if (this.saveBinaryTrees) {
return PARSE_TAG_DEPPARSE_BINARIZED_TREES;
} else {
return PARSE_TAG_DEPPARSE;
}
} else { } else {
return PARSE_AND_TAG; if (this.saveBinaryTrees) {
return PARSE_TAG_BINARIZED_TREES;
} else {
return PARSE_AND_TAG;
}
} }
} }
} }
8 changes: 1 addition & 7 deletions src/edu/stanford/nlp/pipeline/ParserAnnotatorUtils.java
Expand Up @@ -31,8 +31,7 @@ private ParserAnnotatorUtils() {} // static methods
*/ */
public static void fillInParseAnnotations(boolean verbose, boolean buildGraphs, public static void fillInParseAnnotations(boolean verbose, boolean buildGraphs,
GrammaticalStructureFactory gsf, CoreMap sentence, GrammaticalStructureFactory gsf, CoreMap sentence,
List<Tree> trees, GrammaticalStructure.Extras extras, List<Tree> trees, GrammaticalStructure.Extras extras) {
UniversalDependenciesFeatureAnnotator featureAnnotator) {


boolean first = true; boolean first = true;
for (Tree tree : trees) { for (Tree tree : trees) {
Expand Down Expand Up @@ -63,11 +62,6 @@ public static void fillInParseAnnotations(boolean verbose, boolean buildGraphs,
SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies(gsf.newGrammaticalStructure(tree), extras); SemanticGraph uncollapsedDeps = SemanticGraphFactory.generateUncollapsedDependencies(gsf.newGrammaticalStructure(tree), extras);
SemanticGraph ccDeps = SemanticGraphFactory.generateCCProcessedDependencies(gsf.newGrammaticalStructure(tree), extras); SemanticGraph ccDeps = SemanticGraphFactory.generateCCProcessedDependencies(gsf.newGrammaticalStructure(tree), extras);


// add features to graphs if we are converting to English UD
if (featureAnnotator != null) {
featureAnnotator.addFeatures(deps, tree, false, true);
}

if (verbose) { if (verbose) {
System.err.println("SDs:"); System.err.println("SDs:");
System.err.println(deps.toString(SemanticGraph.OutputFormat.LIST)); System.err.println(deps.toString(SemanticGraph.OutputFormat.LIST));
Expand Down
4 changes: 3 additions & 1 deletion src/edu/stanford/nlp/pipeline/StanfordCoreNLP.java
Expand Up @@ -415,11 +415,13 @@ protected synchronized AnnotatorPool getDefaultAnnotatorPool(final Properties in
pool.register(STANFORD_COREF, AnnotatorFactories.coref(properties, annotatorImplementation)); pool.register(STANFORD_COREF, AnnotatorFactories.coref(properties, annotatorImplementation));
pool.register(STANFORD_RELATION, AnnotatorFactories.relation(properties, annotatorImplementation)); pool.register(STANFORD_RELATION, AnnotatorFactories.relation(properties, annotatorImplementation));
pool.register(STANFORD_SENTIMENT, AnnotatorFactories.sentiment(properties, annotatorImplementation)); pool.register(STANFORD_SENTIMENT, AnnotatorFactories.sentiment(properties, annotatorImplementation));
pool.register(STANFORD_COLUMN_DATA_CLASSIFIER,AnnotatorFactories.columnDataClassifier(properties,annotatorImplementation)); pool.register(STANFORD_COLUMN_DATA_CLASSIFIER,AnnotatorFactories.columnDataClassifier(properties, annotatorImplementation));
pool.register(STANFORD_DEPENDENCIES, AnnotatorFactories.dependencies(properties, annotatorImplementation)); pool.register(STANFORD_DEPENDENCIES, AnnotatorFactories.dependencies(properties, annotatorImplementation));
pool.register(STANFORD_NATLOG, AnnotatorFactories.natlog(properties, annotatorImplementation)); pool.register(STANFORD_NATLOG, AnnotatorFactories.natlog(properties, annotatorImplementation));
pool.register(STANFORD_OPENIE, AnnotatorFactories.openie(properties, annotatorImplementation)); pool.register(STANFORD_OPENIE, AnnotatorFactories.openie(properties, annotatorImplementation));
pool.register(STANFORD_QUOTE, AnnotatorFactories.quote(properties, annotatorImplementation)); pool.register(STANFORD_QUOTE, AnnotatorFactories.quote(properties, annotatorImplementation));
pool.register(STANFORD_UD_FEATURES, AnnotatorFactories.udfeats(properties, annotatorImplementation));

// Add more annotators here // Add more annotators here


// add annotators loaded via reflection from classnames specified // add annotators loaded via reflection from classnames specified
Expand Down
8 changes: 4 additions & 4 deletions src/edu/stanford/nlp/pipeline/StanfordCoreNLPServer.java
Expand Up @@ -98,14 +98,14 @@ private static Map<String, String> getURLParams(URI uri) throws UnsupportedEncod


String query = uri.getQuery(); String query = uri.getQuery();
String[] queryFields = query String[] queryFields = query
.replaceAll("\\\\&", "___AMP___") .replace("\\&", "___AMP___")
.replaceAll("\\\\+", "___PLUS___") .replace("\\+", "___PLUS___")
.split("&"); .split("&");
for (String queryField : queryFields) { for (String queryField : queryFields) {
int firstEq = queryField.indexOf('='); int firstEq = queryField.indexOf('=');
// Convention uses "+" for spaces. // Convention uses "+" for spaces.
String key = URLDecoder.decode(queryField.substring(0, firstEq), "utf8").replaceAll("___AMP___", "&").replaceAll("___PLUS___", "+"); String key = URLDecoder.decode(queryField.substring(0, firstEq), "utf8").replace("___AMP___", "&").replace("___PLUS___", "+");
String value = URLDecoder.decode(queryField.substring(firstEq + 1), "utf8").replaceAll("___AMP___", "&").replaceAll("___PLUS___", "+"); String value = URLDecoder.decode(queryField.substring(firstEq + 1), "utf8").replace("___AMP___", "&").replace("___PLUS___", "+");
urlParams.put(key, value); urlParams.put(key, value);
} }
return urlParams; return urlParams;
Expand Down

0 comments on commit b8a9bbe

Please sign in to comment.