Skip to content

Commit

Permalink
Add an endpoint for 'scenegraph' to the server.
Browse files Browse the repository at this point in the history
Processes requests using the scenegraph package:
https://nlp.stanford.edu/software/scenegraph-parser.shtml

Output is in either the text or json format from the scenegraph package.

Requested in #1346

Leave a note about not having tested the scenegraph parser for thread safety
  • Loading branch information
AngledLuffa committed Jul 15, 2023
1 parent 267041e commit 8b40947
Showing 1 changed file with 206 additions and 21 deletions.
227 changes: 206 additions & 21 deletions src/edu/stanford/nlp/pipeline/StanfordCoreNLPServer.java
Expand Up @@ -11,6 +11,8 @@
import edu.stanford.nlp.ling.tokensregex.SequenceMatchResult;
import edu.stanford.nlp.ling.tokensregex.TokenSequenceMatcher;
import edu.stanford.nlp.ling.tokensregex.TokenSequencePattern;
import edu.stanford.nlp.scenegraph.RuleBasedParser;
import edu.stanford.nlp.scenegraph.SceneGraph;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
import edu.stanford.nlp.semgraph.semgrex.ProcessSemgrexRequest;
Expand Down Expand Up @@ -109,6 +111,8 @@ public class StanfordCoreNLPServer implements Runnable {
*/
private SoftReference<Pair<String, StanfordCoreNLP>> lastPipeline = new SoftReference<>(null);

private RuleBasedParser sceneParser = null;

/**
* An executor to time out CoreNLP execution with.
*/
Expand Down Expand Up @@ -295,35 +299,18 @@ private static Map<String, String> getURLParams(URI uri) throws UnsupportedEncod
* @throws ClassNotFoundException Thrown if we cannot load the serializer.
*/
private Annotation getDocument(Properties props, HttpExchange httpExchange) throws IOException, ClassNotFoundException {
String inputFormat = props.getProperty("inputFormat", "text");
final String inputFormat = props.getProperty("inputFormat", "text");
String date = props.getProperty("date");
switch (inputFormat) {
case "text":
// The default encoding by the HTTP standard is ISO-8859-1, but most
// real users of CoreNLP would likely assume UTF-8 by default.
String defaultEncoding = this.strict ? "ISO-8859-1" : "UTF-8";
// Get the encoding
Headers headers = httpExchange.getRequestHeaders();
String encoding;
// the original default behavior of the server was to
// unescape, so let's assume by default that the input text is
// escaped. if the Content-type is set to text we will know
// we shouldn't unescape after all
String contentType = URL_ENCODED;
if (headers.containsKey("Content-type")) {
contentType = headers.getFirst("Content-type").split(";")[0].trim();
String[] charsetPair = Arrays.stream(headers.getFirst("Content-type").split(";"))
.map(x -> x.split("="))
.filter(x -> x.length > 0 && "charset".equals(x[0]))
.findFirst().orElse(new String[]{"charset", defaultEncoding});
if (charsetPair.length == 2) {
encoding = charsetPair[1];
} else {
encoding = defaultEncoding;
}
} else {
encoding = defaultEncoding;
}
final String contentType = getContentType(headers);
// Get the encoding
final String encoding = getEncoding(headers);

String text = IOUtils.slurpReader(IOUtils.encodedInputStreamReader(httpExchange.getRequestBody(), encoding));
if (contentType.equals(URL_ENCODED)) {
Expand Down Expand Up @@ -352,6 +339,71 @@ private Annotation getDocument(Properties props, HttpExchange httpExchange) thro
}
}

private String getContentType(Headers headers) {
String contentType = URL_ENCODED;
if (headers.containsKey("Content-type")) {
contentType = headers.getFirst("Content-type").split(";")[0].trim();
}
return contentType;
}

private String getEncoding(Headers headers) {
// The default encoding by the HTTP standard is ISO-8859-1, but most
// real users of CoreNLP would likely assume UTF-8 by default.
String defaultEncoding = this.strict ? "ISO-8859-1" : "UTF-8";
if (headers.containsKey("Content-type")) {
String[] charsetPair = Arrays.stream(headers.getFirst("Content-type").split(";"))
.map(x -> x.split("="))
.filter(x -> x.length > 0 && "charset".equals(x[0]))
.findFirst().orElse(new String[]{"charset", defaultEncoding});
if (charsetPair.length == 2) {
return charsetPair[1];
} else {
return defaultEncoding;
}
} else {
return defaultEncoding;
}
}

/**
* Get a SceneGraph request from the query, either from a query parameter (q)
* or from the body of the request
* <br>
* TODO: don't actually know if the scenegraph parser is threadsafe.
*
* @return query
*/
private String getSceneGraphRequest(Properties props, HttpExchange httpExchange) throws IOException, ClassNotFoundException {
final String inputFormat = props.getProperty("inputFormat", "text");
if (!inputFormat.equals("text")) {
throw new IOException("Unhandled input format for scenegraph: " + inputFormat);
}
String query = props.getProperty("q", null);
if (query != null) {
return query;
}

Headers headers = httpExchange.getRequestHeaders();
// the original default behavior of the server was to
// unescape, so let's assume by default that the input text is
// escaped. if the Content-type is set to text we will know
// we shouldn't unescape after all
final String contentType = getContentType(headers);
// Get the encoding
final String encoding = getEncoding(headers);

String text = IOUtils.slurpReader(IOUtils.encodedInputStreamReader(httpExchange.getRequestBody(), encoding));
if (contentType.equals(URL_ENCODED)) {
try {
text = URLDecoder.decode(text, encoding);
} catch (IllegalArgumentException e) {
// ignore decoding errors so that libraries which don't specify a content type might not fail
}
}

return text;
}

/**
* Create (or retrieve) a StanfordCoreNLP object corresponding to these properties.
Expand Down Expand Up @@ -394,6 +446,29 @@ private StanfordCoreNLP mkStanfordCoreNLP(Properties props) {
return impl;
}

/**
* This server has at most one SceneGraph parser, and it is not created at startup time
* as most applications will not use it.
* <br>
* This function call creates it in a synchronized manner, so at most one is ever created.
* <br>
* @return RuleBasedParser
*/
private RuleBasedParser mkSceneGraphParser() {
if (sceneParser != null) {
return sceneParser;
}
synchronized (this) {
// in case it got created in another thread
if (sceneParser != null) {
return sceneParser;
}
RuleBasedParser parser = new RuleBasedParser();
sceneParser = parser;
return parser;
}
}

/**
* Parse the parameters of a connection into a CoreNLP properties file that can be passed into
* {@link StanfordCoreNLP}, and used in the I/O stages.
Expand Down Expand Up @@ -1404,6 +1479,115 @@ public void handle(HttpExchange httpExchange) throws IOException {
}
}

/**
* A handler for executing scenegraph on text
*/
protected class SceneGraphHandler implements HttpHandler {

/**
* An authenticator to determine if we can perform this API request.
*/
private final Predicate<Properties> authenticator;

/**
* Create a new SceneGraphHandler.
* <br>
* It's not clear what a callback would do with this, since there's no Annotation at the end of a SceneGraph call, so we just skip it
* @param callback The callback to call when annotation has finished.
*/
public SceneGraphHandler(Predicate<Properties> authenticator) {
this.authenticator = authenticator;
}

@Override
public void handle(HttpExchange httpExchange) throws IOException {
if (onBlockList(httpExchange)) {
respondUnauthorized(httpExchange);
return;
}
setHttpExchangeResponseHeaders(httpExchange);

Properties props = getProperties(httpExchange);

if (authenticator != null && ! authenticator.test(props)) {
respondUnauthorized(httpExchange);
return;
}
Map<String, String> params = getURLParams(httpExchange.getRequestURI());

Future<Pair<String, SceneGraph>> response = corenlpExecutor.submit(() -> {
try {
// Get the document
String request = getSceneGraphRequest(props, httpExchange);
if (request == null || request.equals("")) {
respondBadInput("Blank input in scenegraph", httpExchange);
return Pair.makePair("", null);
}
RuleBasedParser parser = mkSceneGraphParser();

SceneGraph graph = parser.parse(request);
if (graph == null) {
respondError("Something weird happened and the text could not be parsed!", httpExchange);
}
return Pair.makePair(request, graph);
} catch (RuntimeException e) {
warn(e);
try {
respondError(e.getClass().getName() + ": " + e.getMessage(), httpExchange);
} catch (IOException ignored) {
}
}
return Pair.makePair("", null);
});

// Send response
try {
int timeout = getTimeout(props, httpExchange);
if (sceneParser == null) {
timeout = timeout + 60000; // add 60 seconds for loading a pipeline if needed
}
Pair<String, SceneGraph> pair = response.get(timeout, TimeUnit.MILLISECONDS);
SceneGraph graph = pair.second;
if (graph == null) {
// already responded with an error
return;
}

final StanfordCoreNLP.OutputFormat of;
try {
of = StanfordCoreNLP.OutputFormat.valueOf(props.getProperty("outputFormat", "json").toUpperCase(Locale.ROOT));
} catch (RuntimeException e) {
String badFormat = props.getProperty("outputFormat");
log("Received bad output format in scenegraph '" + badFormat + "'");
respondBadInput("Interface scenegraph does not handle output format '" + badFormat + "'", httpExchange);
return;
}

final String result;
switch(of) {
case JSON:
int id = PropertiesUtils.getInt(props, "id", -1);
String url = props.getProperty("url", "");
String phrase = pair.first;
result = graph.toJSON(id, url, phrase);
break;
case TEXT:
result = graph.toReadableString();
break;
default:
log("Received unhanded output format in scenegraph '" + of + "'");
respondBadInput("Interface scenegraph does not handle output format " + of, httpExchange);
return;
}

byte[] content = result.getBytes();
sendAndGetResponse(httpExchange, content);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
respondError("Timeout when executing scenegraph query", httpExchange);
}
}
}

private static void sendAndGetResponse(HttpExchange httpExchange, byte[] response) throws IOException {
if (response.length > 0) {
httpExchange.getResponseHeaders().add("Content-type", "application/json");
Expand Down Expand Up @@ -1547,6 +1731,7 @@ public void run(Optional<Pair<String,String>> basicAuth,
withAuth(server.createContext(uriContext+"/tokensregex", new TokensRegexHandler(authenticator, callback)), basicAuth);
withAuth(server.createContext(uriContext+"/semgrex", new SemgrexHandler(authenticator, callback)), basicAuth);
withAuth(server.createContext(uriContext+"/tregex", new TregexHandler(authenticator, callback)), basicAuth);
withAuth(server.createContext(uriContext+"/scenegraph", new SceneGraphHandler(authenticator)), basicAuth);
withAuth(server.createContext(uriContext+"/corenlp-brat.js", new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.js", "application/javascript")), basicAuth);
withAuth(server.createContext(uriContext+"/corenlp-brat.cs", new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.css", "text/css")), basicAuth);
withAuth(server.createContext(uriContext+"/corenlp-parseviewer.js", new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-parseviewer.js", "application/javascript")), basicAuth);
Expand Down

0 comments on commit 8b40947

Please sign in to comment.