diff --git a/doc/Grobid-service.md b/doc/Grobid-service.md index 0b2c3308d1..58dcb39916 100644 --- a/doc/Grobid-service.md +++ b/doc/Grobid-service.md @@ -82,6 +82,19 @@ _consolidateHeader_ is a string of value 0 (no consolidation) or 1 (consolidate, | POST, PUT | multipart/form-data | application/xml | input | required | PDF file to be processed | | | | |consolidateHeader| optional | consolidateHeader is a string of value 0 (no consolidation) or 1 (consolidate, default value) | +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 2 seconds for the `processHeaderDocument` service. + You can test this service with the **cURL** command lines, for instance header extraction from a PDF file in the current directory: ```bash curl -v --form input=@./thefile.pdf localhost:8070/api/processHeaderDocument @@ -98,6 +111,18 @@ Convert the complete input document into TEI XML format (header, body and biblio | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate all found citations) | | | | |teiCoordinates| optional | list of element names for which coordinates in the PDF document have to be added, see [Coordinates of structures in the original PDF](Coordinates-in-PDF.md) for more details | +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 5-10 seconds for the `processFulltextDocument` service. + You can test this service with the **cURL** command lines, for instance fulltext extraction (header, body and citations) from a PDF file in the current directory: ```bash curl -v --form input=@./thefile.pdf localhost:8070/api/processFulltextDocument @@ -124,6 +149,17 @@ Extract and convert all the bibliographical references present in the input docu | POST, PUT | multipart/form-data | application/xml | input | required | PDF file to be processed | | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate all found citations) | +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 3-6 seconds for the `processFulltextDocument` service. You can test this service with the **cURL** command lines, for instance extraction and parsing of all references from a PDF in the current directory without consolidation (default value): ```bash @@ -141,6 +177,18 @@ Parse a raw date string and return the corresponding normalized date in ISO 8601 | POST, PUT | application/x-www-form-urlencoded | application/xml | date | required | date to be parsed as raw string| +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 1 seconds for the `processDate` service. + You can test this service with the **cURL** command lines, for instance parsing of a raw date string: ```bash curl -X POST -d "date=September 16th, 2001" localhost:8070/api/processDate @@ -159,6 +207,18 @@ Parse a raw string corresponding to a name or a sequence of names from a header | POST, PUT | application/x-www-form-urlencoded | application/xml | names | required | sequence of names to be parsed as raw string| +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 1 seconds for the `processHeaderNames` service. + You can test this service with the **cURL** command lines, for instance parsing of a raw sequence of header names string: ```bash curl -X POST -d "names=John Doe and Jane Smith" localhost:8070/api/processHeaderNames @@ -183,6 +243,21 @@ Parse a raw sequence of names from a bibliographical reference and return the co |--- |--- |--- |--- |--- |--- | | POST, PUT | application/x-www-form-urlencoded | application/xml | names | required | sequence of names to be parsed as raw string| + +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 1 seconds for the `processCitationNames` service. + + + You can test this service with the **cURL** command lines, for instance parsing of a raw sequence of citation names string: ```bash curl -X POST -d "names=J. Doe, J. Smith and B. M. Jackson" localhost:8070/api/processCitationNames @@ -206,12 +281,26 @@ which will return: #### /api/processAffiliations -Parse a raw sequence of affiliations with or without address and return the corresponding normalized affiliations with address in TEI format. +Parse a raw sequence of affiliations/addresses with or without address and return the corresponding normalized affiliations with address in TEI format. | method | request type | response type | parameters | requirement | description | |--- |--- |--- |--- |--- |--- | | POST, PUT | application/x-www-form-urlencoded | application/xml | affiliations | required | sequence of affiliations+addresses to be parsed as raw string| + +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 1 seconds for the `processAffiliations` service. + + You can test this service with the **cURL** command lines, for instance parsing of a raw affiliation string: ```bash curl -X POST -d "affiliations=Stanford University, California, USA" localhost:8070/api/processAffiliations @@ -237,6 +326,19 @@ Parse a raw bibliographical reference (in isolation) and return the correspondin | POST, PUT | application/x-www-form-urlencoded | application/xml | citations | required | bibliographical reference to be parsed as raw string| | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate the citation) | + +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 1 seconds for the `processCitation` service. + You can test this service with the **cURL** command lines, for instance parsing of a raw bibliographical reference string in isolation without consolidation (default value): ```bash curl -X POST -d "citations=Graff, Expert. Opin. Ther. Targets (2002) 6(1): 103-113" localhost:8070/api/processCitation @@ -281,6 +383,21 @@ For information about how the coordinates are provided, see [Coordinates of stru | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate the citation) | + +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 3-6 seconds for the `referenceAnnotations` service. + + + #### /api/annotatePDF Return the PDF augmented with PDF annotations relative to the reference informations: reference callouts with links to the full bibliographical reference and bibliographical reference with possible external URL. @@ -292,6 +409,17 @@ Note that this service modify the original PDF, and thus be careful with legal r | POST | multipart/form-data | application/pdf | input | required | PDF file to be processed | | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate the citation) | +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the service and the capacities of the server, we suggest 5-10 seconds for the `annotatePDF` service. ### Citation extraction and normalization from patents @@ -305,6 +433,18 @@ Extract and parse the patent and non patent citations in the description of a pa | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate the citation) | +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the capacities of the server and the size of the input string, we suggest 5-10 seconds for the `processCitationPatentTXT` service. + You can test this service with the **cURL** command lines, for instance parsing of a raw bibliographical reference string in isolation without consolidation (default value): ```bash curl -X POST -d "input=In EP0123456B1 nothing interesting." localhost:8070/api/processCitationPatentTXT @@ -350,6 +490,18 @@ Extract and parse the patent and non patent citations in the description of a pa | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate the citation) | +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the capacities of the server and the size of the input document, we suggest 5-10 seconds for the `processCitationPatentTEI` service. + #### /api/processCitationPatentST36 @@ -361,6 +513,19 @@ Extract and parse the patent and non patent citations in the description of a pa | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate the citation) | +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the capacities of the server and the size of the input document, we suggest 5-10 seconds for the `processCitationPatentST36` service. + + #### /api/processCitationPatentPDF Extract and parse the patent and non patent citations in the description of a patent publication sent as PDF. Results are returned as a list of TEI citations. Note that the text layer must be available in the PDF to be processed (which is, surprisingly in this century, very rarely the case with the PDF avaialble from the main patent offices - however the patent publications that can be downloaded from Google Patents for instance have been processed by a good quality OCR). @@ -373,6 +538,19 @@ Extract and parse the patent and non patent citations in the description of a pa | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate the citation) | +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the capacities of the server and the size of the input document, we suggest 5-10 seconds for the `processCitationPatentPDF` service. + + #### /api/citationPatentAnnotations This service is similar to `/api/referenceAnnotations` but for a patent document in PDF. JSON annotations relative the the input PDF are returned with coordinates as described in the page [Coordinates of structures in the original PDF](Coordinates-in-PDF.md). @@ -385,6 +563,19 @@ Patent and non patent citations can be directly visualised on the PDF layout as | | | |consolidateCitations| optional | consolidateCitations is a string of value 0 (no consolidation, default value) or 1 (consolidate the citation) | +Response status codes: + +| HTTP Status code | reason | +|--- |--- | +| 200 | Successful operation. | +| 204 | Process was completed, but no content could be extracted and structured | +| 400 | Wrong request, missing parameters, missing header | +| 500 | Indicate an internal service error, further described by a provided message | +| 503 | The service is not available, which usually means that all the threads are currently used | + +A `503` error with the default parallel mode normally means that all the threads available to GROBID are currently used. The client need to re-send the query after a wait time that will allow the server to free some threads. The wait time depends on the capacities of the server and the size of the input document, we suggest 5-10 seconds for the `citationPatentAnnotations` service. + + ### Administration services #### Configuration of the password for the service adminstration diff --git a/grobid-service/src/main/java/org/grobid/service/GrobidRestService.java b/grobid-service/src/main/java/org/grobid/service/GrobidRestService.java index fc5b77c342..548157f14f 100755 --- a/grobid-service/src/main/java/org/grobid/service/GrobidRestService.java +++ b/grobid-service/src/main/java/org/grobid/service/GrobidRestService.java @@ -141,22 +141,9 @@ public Response getAdmin_htmlGet(@QueryParam(SHA1) String sha1) { @POST public Response processHeaderDocument_post(@FormDataParam(INPUT) InputStream inputStream, @FormDataParam("consolidateHeader") String consolidate) { - boolean consol = validateConsolidationParam(consolidate); - String retVal = restProcessFiles.processStatelessHeaderDocument(inputStream, consol); - Response response; - if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { - response = Response.status(Response.Status.NO_CONTENT).build(); - } else { - response = Response.status(Response.Status.OK) - .entity(retVal) - .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_XML + "; charset=UTF-8") - .header("Access-Control-Allow-Origin", "*") - .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT") - .build(); - } - return response; + return restProcessFiles.processStatelessHeaderDocument(inputStream, consol); } @Path(PATH_HEADER) @@ -210,19 +197,7 @@ private Response processFulltext(InputStream inputStream, List teiCoordinates = collectCoordinates(coordinates); - String retVal = restProcessFiles.processFulltextDocument(inputStream, consolHeader, consolCitations, startPage, endPage, generate, teiCoordinates); - - Response response; - if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { - response = Response.status(Response.Status.NO_CONTENT).build(); - } else { - response = Response.status(Response.Status.OK) - .entity(retVal) - .type(MediaType.APPLICATION_XML).build(); - } - - return response; - + return restProcessFiles.processFulltextDocument(inputStream, consolHeader, consolCitations, startPage, endPage, generate, teiCoordinates); } private List collectCoordinates(List coordinates) { diff --git a/grobid-service/src/main/java/org/grobid/service/process/GrobidRestProcessFiles.java b/grobid-service/src/main/java/org/grobid/service/process/GrobidRestProcessFiles.java index e016ee0081..ca62335e8a 100644 --- a/grobid-service/src/main/java/org/grobid/service/process/GrobidRestProcessFiles.java +++ b/grobid-service/src/main/java/org/grobid/service/process/GrobidRestProcessFiles.java @@ -45,6 +45,7 @@ import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; +import java.util.NoSuchElementException; /** * Web services consuming a file @@ -67,31 +68,58 @@ public GrobidRestProcessFiles() { * @param consolidate consolidation parameter for the header extraction * @return a response object which contains a TEI representation of the header part */ - public String processStatelessHeaderDocument(final InputStream inputStream, final boolean consolidate) { + public Response processStatelessHeaderDocument(final InputStream inputStream, final boolean consolidate) { LOGGER.debug(methodLogIn()); String retVal = null; - boolean isparallelExec = GrobidServiceProperties.isParallelExec(); + Response response = null; + boolean isParallelExec = GrobidServiceProperties.isParallelExec(); File originFile = null; Engine engine = null; try { - originFile = IOUtilities.writeInputFile(inputStream); + engine = Engine.getEngine(isParallelExec); + // conservative check, if no engine is free in the pool a NoSuchElementException is normally thrown + if (engine == null) { + throw new GrobidServiceException( + "No GROBID engine available", Status.SERVICE_UNAVAILABLE); + } + originFile = IOUtilities.writeInputFile(inputStream); if (originFile == null) { + LOGGER.error("The input file cannot be written."); throw new GrobidServiceException( "The input file cannot be written. ", Status.INTERNAL_SERVER_ERROR); + } + + // starts conversion process + retVal = engine.processHeader(originFile.getAbsolutePath(), consolidate, null); + + if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { + response = Response.status(Response.Status.NO_CONTENT).build(); } else { - // starts conversion process - engine = Engine.getEngine(isparallelExec); - retVal = engine.processHeader(originFile.getAbsolutePath(), consolidate, null); + response = Response.status(Response.Status.OK) + .entity(retVal) + .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_XML + "; charset=UTF-8") + .header("Access-Control-Allow-Origin", "*") + .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT") + .build(); } + } catch (NoSuchElementException nseExp) { + LOGGER.error("Could not get an engine from the pool within configured time. Sending service unavailable."); + response = Response.status(Status.SERVICE_UNAVAILABLE).build(); + } catch (Exception exp) { + LOGGER.error("An unexpected exception occurs. ", exp); + response = Response.status(Status.INTERNAL_SERVER_ERROR).entity(exp.getMessage()).build(); } finally { - IOUtilities.removeTempFile(originFile); - if (isparallelExec && engine != null) { + if (originFile != null) + IOUtilities.removeTempFile(originFile); + + if (isParallelExec && engine != null) { GrobidPoolingFactory.returnEngine(engine); } } + LOGGER.debug(methodLogOut()); - return retVal; + return response; } /** @@ -111,7 +139,7 @@ public String processStatelessHeaderDocument(final InputStream inputStream, fina * @return a response object mainly contain the TEI representation of the * full text */ - public String processFulltextDocument(final InputStream inputStream, + public Response processFulltextDocument(final InputStream inputStream, final boolean consolidateHeader, final boolean consolidateCitations, final int startPage, @@ -119,41 +147,66 @@ public String processFulltextDocument(final InputStream inputStream, final boolean generateIDs, final List teiCoordinates) throws Exception { LOGGER.debug(methodLogIn()); - String retVal; - boolean isparallelExec = GrobidServiceProperties.isParallelExec(); + String retVal = null; + Response response = null; + boolean isParallelExec = GrobidServiceProperties.isParallelExec(); File originFile = null; Engine engine = null; try { - originFile = IOUtilities.writeInputFile(inputStream); + engine = Engine.getEngine(isParallelExec); + // conservative check, if no engine is free in the pool a NoSuchElementException is normally thrown + if (engine == null) { + throw new GrobidServiceException( + "No GROBID engine available", Status.SERVICE_UNAVAILABLE); + } + originFile = IOUtilities.writeInputFile(inputStream); if (originFile == null) { + LOGGER.error("The input file cannot be written."); throw new GrobidServiceException( - "The input file cannot be written. ", - Status.INTERNAL_SERVER_ERROR); + "The input file cannot be written.", Status.INTERNAL_SERVER_ERROR); + } + + // starts conversion process + GrobidAnalysisConfig config = + GrobidAnalysisConfig.builder() + .consolidateHeader(consolidateHeader) + .consolidateCitations(consolidateCitations) + .startPage(startPage) + .endPage(endPage) + .generateTeiIds(generateIDs) + .generateTeiCoordinates(teiCoordinates) + .build(); + + retVal = engine.fullTextToTEI(originFile, config); + + if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { + response = Response.status(Response.Status.NO_CONTENT).build(); } else { - // starts conversion process - engine = Engine.getEngine(isparallelExec); - GrobidAnalysisConfig config = - GrobidAnalysisConfig.builder() - .consolidateHeader(consolidateHeader) - .consolidateCitations(consolidateCitations) - .startPage(startPage) - .endPage(endPage) - .generateTeiIds(generateIDs) - .generateTeiCoordinates(teiCoordinates) - .build(); - - retVal = engine.fullTextToTEI(originFile, config); + response = Response.status(Response.Status.OK) + .entity(retVal) + .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_XML + "; charset=UTF-8") + .header("Access-Control-Allow-Origin", "*") + .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT") + .build(); } + } catch (NoSuchElementException nseExp) { + LOGGER.error("Could not get an engine from the pool within configured time. Sending service unavailable."); + response = Response.status(Status.SERVICE_UNAVAILABLE).build(); + } catch (Exception exp) { + LOGGER.error("An unexpected exception occurs. ", exp); + response = Response.status(Status.INTERNAL_SERVER_ERROR).entity(exp.getMessage()).build(); } finally { - if (isparallelExec && (engine != null)) { + if (isParallelExec && (engine != null)) { GrobidPoolingFactory.returnEngine(engine); } - IOUtilities.removeTempFile(originFile); + if (originFile != null) + IOUtilities.removeTempFile(originFile); } + LOGGER.debug(methodLogOut()); - return retVal; + return response; } /** @@ -182,97 +235,107 @@ public Response processStatelessFulltextAssetDocument(final InputStream inputStr final boolean generateIDs) throws Exception { LOGGER.debug(methodLogIn()); Response response = null; - String retVal; - boolean isparallelExec = GrobidServiceProperties.isParallelExec(); + String retVal = null; + boolean isParallelExec = GrobidServiceProperties.isParallelExec(); File originFile = null; Engine engine = null; String assetPath = null; try { - originFile = IOUtilities.writeInputFile(inputStream); + engine = Engine.getEngine(isParallelExec); + // conservative check, if no engine is free in the pool a NoSuchElementException is normally thrown + if (engine == null) { + throw new GrobidServiceException( + "No GROBID engine available", Status.SERVICE_UNAVAILABLE); + } + originFile = IOUtilities.writeInputFile(inputStream); if (originFile == null) { - response = Response.status(Status.INTERNAL_SERVER_ERROR).build(); + LOGGER.error("The input file cannot be written."); + throw new GrobidServiceException( + "The input file cannot be written.", Status.INTERNAL_SERVER_ERROR); + } + + // set the path for the asset files + assetPath = GrobidProperties.getTempPath().getPath() + File.separator + KeyGen.getKey(); + + // starts conversion process + GrobidAnalysisConfig config = + GrobidAnalysisConfig.builder() + .consolidateHeader(consolidateHeader) + .consolidateCitations(consolidateCitations) + .startPage(startPage) + .endPage(endPage) + .generateTeiIds(generateIDs) + .pdfAssetPath(new File(assetPath)) + .build(); + + retVal = engine.fullTextToTEI(originFile, config); + + if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { + response = Response.status(Status.NO_CONTENT).build(); } else { - // set the path for the asset files - assetPath = GrobidProperties.getTempPath().getPath() + File.separator + KeyGen.getKey(); - - // starts conversion process - engine = Engine.getEngine(isparallelExec); - GrobidAnalysisConfig config = - GrobidAnalysisConfig.builder() - .consolidateHeader(consolidateHeader) - .consolidateCitations(consolidateCitations) - .startPage(startPage) - .endPage(endPage) - .generateTeiIds(generateIDs) - .pdfAssetPath(new File(assetPath)) - .build(); - - retVal = engine.fullTextToTEI(originFile, config); - - if (isparallelExec) { - GrobidPoolingFactory.returnEngine(engine); - engine = null; - } - - IOUtilities.removeTempFile(originFile); - if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { - response = Response.status(Status.NO_CONTENT).build(); - } else { - - response = Response.status(Status.OK).type("application/zip").build(); - - ByteArrayOutputStream ouputStream = new ByteArrayOutputStream(); - ZipOutputStream out = new ZipOutputStream(ouputStream); - out.putNextEntry(new ZipEntry("tei.xml")); - out.write(retVal.getBytes(Charset.forName("UTF-8"))); - // put now the assets, i.e. all the files under the asset path - File assetPathDir = new File(assetPath); - if (assetPathDir.exists()) { - File[] files = assetPathDir.listFiles(); - if (files != null) { - byte[] buffer = new byte[1024]; - for (final File currFile : files) { - if (currFile.getName().toLowerCase().endsWith(".jpg") - || currFile.getName().toLowerCase().endsWith(".png")) { - try { - ZipEntry ze = new ZipEntry(currFile.getName()); - out.putNextEntry(ze); - FileInputStream in = new FileInputStream(currFile); - int len; - while ((len = in.read(buffer)) > 0) { - out.write(buffer, 0, len); - } - in.close(); - out.closeEntry(); - } catch (IOException e) { - throw new GrobidServiceException("IO Exception when zipping", e, Status.INTERNAL_SERVER_ERROR); + response = Response.status(Status.OK).type("application/zip").build(); + + ByteArrayOutputStream ouputStream = new ByteArrayOutputStream(); + ZipOutputStream out = new ZipOutputStream(ouputStream); + out.putNextEntry(new ZipEntry("tei.xml")); + out.write(retVal.getBytes(Charset.forName("UTF-8"))); + // put now the assets, i.e. all the files under the asset path + File assetPathDir = new File(assetPath); + if (assetPathDir.exists()) { + File[] files = assetPathDir.listFiles(); + if (files != null) { + byte[] buffer = new byte[1024]; + for (final File currFile : files) { + if (currFile.getName().toLowerCase().endsWith(".jpg") + || currFile.getName().toLowerCase().endsWith(".png")) { + try { + ZipEntry ze = new ZipEntry(currFile.getName()); + out.putNextEntry(ze); + FileInputStream in = new FileInputStream(currFile); + int len; + while ((len = in.read(buffer)) > 0) { + out.write(buffer, 0, len); } + in.close(); + out.closeEntry(); + } catch (IOException e) { + throw new GrobidServiceException("IO Exception when zipping", e, Status.INTERNAL_SERVER_ERROR); } } } } - out.finish(); - - response = Response - .ok() - .type("application/zip") - .entity(ouputStream.toByteArray()) - .header("Content-Disposition", "attachment; filename=\"result.zip\"") - .build(); - out.close(); } + out.finish(); + + response = Response + .ok() + .type("application/zip") + .entity(ouputStream.toByteArray()) + .header("Content-Disposition", "attachment; filename=\"result.zip\"") + .build(); + out.close(); } + } catch (NoSuchElementException nseExp) { + LOGGER.error("Could not get an engine from the pool within configured time. Sending service unavailable."); + response = Response.status(Status.SERVICE_UNAVAILABLE).build(); + } catch (Exception exp) { + LOGGER.error("An unexpected exception occurs. ", exp); + response = Response.status(Status.INTERNAL_SERVER_ERROR).entity(exp.getMessage()).build(); } finally { - IOUtilities.removeTempFile(originFile); + if (originFile != null) + IOUtilities.removeTempFile(originFile); + if (assetPath != null) { IOUtilities.removeTempDirectory(assetPath); } - if (isparallelExec && (engine != null)) { + + if (isParallelExec && (engine != null)) { GrobidPoolingFactory.returnEngine(engine); } } + LOGGER.debug(methodLogOut()); return response; } @@ -311,51 +374,56 @@ public Response processCitationPatentPDF(final InputStream inputStream, final boolean consolidate) throws Exception { LOGGER.debug(methodLogIn()); Response response = null; - String retVal; - boolean isparallelExec = GrobidServiceProperties.isParallelExec(); + String retVal = null; + boolean isParallelExec = GrobidServiceProperties.isParallelExec(); File originFile = null; Engine engine = null; try { - originFile = IOUtilities.writeInputFile(inputStream); + engine = Engine.getEngine(isParallelExec); + // conservative check, if no engine is free in the pool a NoSuchElementException is normally thrown + if (engine == null) { + throw new GrobidServiceException( + "No GROBID engine available", Status.SERVICE_UNAVAILABLE); + } + originFile = IOUtilities.writeInputFile(inputStream); if (originFile == null) { - response = Response.status(Status.INTERNAL_SERVER_ERROR).build(); - } else { - // starts conversion process - engine = Engine.getEngine(isparallelExec); - List patents = new ArrayList<>(); - List articles = new ArrayList<>(); - if (isparallelExec) { - retVal = engine.processAllCitationsInPDFPatent(originFile.getAbsolutePath(), - articles, patents, consolidate); - GrobidPoolingFactory.returnEngine(engine); - engine = null; - } else { - synchronized (engine) { - retVal = engine.processAllCitationsInPDFPatent(originFile.getAbsolutePath(), - articles, patents, consolidate); - } - } + LOGGER.error("The input file cannot be written."); + throw new GrobidServiceException( + "The input file cannot be written.", Status.INTERNAL_SERVER_ERROR); + } - IOUtilities.removeTempFile(originFile); + // starts conversion process + List patents = new ArrayList<>(); + List articles = new ArrayList<>(); + retVal = engine.processAllCitationsInPDFPatent(originFile.getAbsolutePath(), + articles, patents, consolidate); - if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { - response = Response.status(Status.NO_CONTENT).build(); - } else { - //response = Response.status(Status.OK).entity(retVal).type(MediaType.APPLICATION_XML).build(); - response = Response.status(Status.OK) - .entity(retVal) - .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_XML + "; charset=UTF-8") - .header("Access-Control-Allow-Origin", "*") - .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT").build(); - } + if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { + response = Response.status(Status.NO_CONTENT).build(); + } else { + //response = Response.status(Status.OK).entity(retVal).type(MediaType.APPLICATION_XML).build(); + response = Response.status(Status.OK) + .entity(retVal) + .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_XML + "; charset=UTF-8") + .header("Access-Control-Allow-Origin", "*") + .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT").build(); } + } catch (NoSuchElementException nseExp) { + LOGGER.error("Could not get an engine from the pool within configured time. Sending service unavailable."); + response = Response.status(Status.SERVICE_UNAVAILABLE).build(); + } catch (Exception exp) { + LOGGER.error("An unexpected exception occurs. ", exp); + response = Response.status(Status.INTERNAL_SERVER_ERROR).entity(exp.getMessage()).build(); } finally { - IOUtilities.removeTempFile(originFile); - if (isparallelExec && engine != null) { + if (originFile != null) + IOUtilities.removeTempFile(originFile); + + if (isParallelExec && engine != null) { GrobidPoolingFactory.returnEngine(engine); } } + LOGGER.debug(methodLogOut()); return response; } @@ -371,51 +439,56 @@ public Response processCitationPatentST36(final InputStream inputStream, final boolean consolidate) throws Exception { LOGGER.debug(methodLogIn()); Response response = null; - String retVal; - boolean isparallelExec = GrobidServiceProperties.isParallelExec(); + String retVal = null; + boolean isParallelExec = GrobidServiceProperties.isParallelExec(); File originFile = null; Engine engine = null; try { - originFile = IOUtilities.writeInputFile(inputStream); + engine = Engine.getEngine(isParallelExec); + // conservative check, if no engine is free in the pool a NoSuchElementException is normally thrown + if (engine == null) { + throw new GrobidServiceException( + "No GROBID engine available", Status.SERVICE_UNAVAILABLE); + } + originFile = IOUtilities.writeInputFile(inputStream); if (originFile == null) { - response = Response.status(Status.INTERNAL_SERVER_ERROR).build(); - } else { - // starts conversion process - engine = Engine.getEngine(isparallelExec); - List patents = new ArrayList<>(); - List articles = new ArrayList<>(); - if (isparallelExec) { - retVal = engine.processAllCitationsInXMLPatent(originFile.getAbsolutePath(), - articles, patents, consolidate); - GrobidPoolingFactory.returnEngine(engine); - engine = null; - } else { - synchronized (engine) { - retVal = engine.processAllCitationsInXMLPatent(originFile.getAbsolutePath(), - articles, patents, consolidate); - } - } + LOGGER.error("The input file cannot be written."); + throw new GrobidServiceException( + "The input file cannot be written.", Status.INTERNAL_SERVER_ERROR); + } - IOUtilities.removeTempFile(originFile); + // starts conversion process + List patents = new ArrayList<>(); + List articles = new ArrayList<>(); + retVal = engine.processAllCitationsInXMLPatent(originFile.getAbsolutePath(), + articles, patents, consolidate); - if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { - response = Response.status(Status.NO_CONTENT).build(); - } else { - //response = Response.status(Status.OK).entity(retVal).type(MediaType.APPLICATION_XML).build(); - response = Response.status(Status.OK) - .entity(retVal) - .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_XML + "; charset=UTF-8") - .header("Access-Control-Allow-Origin", "*") - .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT").build(); - } + if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { + response = Response.status(Status.NO_CONTENT).build(); + } else { + //response = Response.status(Status.OK).entity(retVal).type(MediaType.APPLICATION_XML).build(); + response = Response.status(Status.OK) + .entity(retVal) + .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_XML + "; charset=UTF-8") + .header("Access-Control-Allow-Origin", "*") + .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT").build(); } + } catch (NoSuchElementException nseExp) { + LOGGER.error("Could not get an engine from the pool within configured time. Sending service unavailable."); + response = Response.status(Status.SERVICE_UNAVAILABLE).build(); + } catch (Exception exp) { + LOGGER.error("An unexpected exception occurs. ", exp); + response = Response.status(Status.INTERNAL_SERVER_ERROR).entity(exp.getMessage()).build(); } finally { - IOUtilities.removeTempFile(originFile); - if (isparallelExec && engine != null) { + if (originFile != null) + IOUtilities.removeTempFile(originFile); + + if (isParallelExec && engine != null) { GrobidPoolingFactory.returnEngine(engine); } } + LOGGER.debug(methodLogOut()); return response; } @@ -433,66 +506,70 @@ public Response processStatelessReferencesDocument(final InputStream inputStream final boolean consolidate) { LOGGER.debug(methodLogIn()); Response response = null; - String retVal; - boolean isparallelExec = GrobidServiceProperties.isParallelExec(); + String retVal = null; + boolean isParallelExec = GrobidServiceProperties.isParallelExec(); File originFile = null; Engine engine = null; try { - originFile = IOUtilities.writeInputFile(inputStream); + engine = Engine.getEngine(isParallelExec); + // conservative check, if no engine is free in the pool a NoSuchElementException is normally thrown + if (engine == null) { + throw new GrobidServiceException( + "No GROBID engine available", Status.SERVICE_UNAVAILABLE); + } + originFile = IOUtilities.writeInputFile(inputStream); if (originFile == null) { - response = Response.status(Status.INTERNAL_SERVER_ERROR).build(); - } else { - // starts conversion process - engine = Engine.getEngine(isparallelExec); - List results = null; - if (isparallelExec) { - results = engine.processReferences(originFile, consolidate); - GrobidPoolingFactory.returnEngine(engine); - engine = null; - } else { - synchronized (engine) { - //TODO: VZ: sync on local var does not make sense - results = engine.processReferences(originFile, consolidate); - } - } + LOGGER.error("The input file cannot be written."); + throw new GrobidServiceException( + "The input file cannot be written.", Status.INTERNAL_SERVER_ERROR); + } + + // starts conversion process + List results = engine.processReferences(originFile, consolidate); + + StringBuilder result = new StringBuilder(); + // dummy header + result.append("\n"); + result.append("\t\n\t\n\t\t\n\t\t" + + "\n\t\t\n\t\t\t
\n\t\t\t\t\n"); + int p = 0; + for (BibDataSet res : results) { + result.append(res.toTEI(p)); + result.append("\n"); + p++; + } + result.append("\t\t\t\t\n\t\t\t
\n\t\t
\n\t
\n
\n"); - IOUtilities.removeTempFile(originFile); + retVal = result.toString(); - StringBuilder result = new StringBuilder(); - // dummy header - result.append("\n"); - result.append("\t\n\t\n\t\t\n\t\t" + - "\n\t\t\n\t\t\t
\n\t\t\t\t\n"); - int p = 0; - for (BibDataSet res : results) { - result.append(res.toTEI(p)); - result.append("\n"); - p++; - } - result.append("\t\t\t\t\n\t\t\t
\n\t\t
\n\t
\n
\n"); - - retVal = result.toString(); - - if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { - response = Response.status(Status.NO_CONTENT).build(); - } else { - //response = Response.status(Status.OK).entity(retVal).type(MediaType.APPLICATION_XML).build(); - response = Response.status(Status.OK) - .entity(retVal) - .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_XML + "; charset=UTF-8") - .header("Access-Control-Allow-Origin", "*") - .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT").build(); - } + if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { + response = Response.status(Status.NO_CONTENT).build(); + } else { + //response = Response.status(Status.OK).entity(retVal).type(MediaType.APPLICATION_XML).build(); + response = Response.status(Status.OK) + .entity(retVal) + .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_XML + "; charset=UTF-8") + .header("Access-Control-Allow-Origin", "*") + .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT").build(); } + } catch (NoSuchElementException nseExp) { + LOGGER.error("Could not get an engine from the pool within configured time. Sending service unavailable."); + response = Response.status(Status.SERVICE_UNAVAILABLE).build(); + } catch (Exception exp) { + LOGGER.error("An unexpected exception occurs. ", exp); + response = Response.status(Status.INTERNAL_SERVER_ERROR).entity(exp.getMessage()).build(); } finally { - IOUtilities.removeTempFile(originFile); - if (isparallelExec && engine != null) { + if (originFile != null) + IOUtilities.removeTempFile(originFile); + + if (isParallelExec && engine != null) { GrobidPoolingFactory.returnEngine(engine); } } + LOGGER.debug(methodLogOut()); return response; } @@ -511,38 +588,51 @@ public Response processPDFAnnotation(final InputStream inputStream, final boolean consolidateCitations, final GrobidRestUtils.Annotation type) throws Exception { LOGGER.debug(methodLogIn()); - Response response; + Response response = null; PDDocument out = null; File originFile = null; + Engine engine = null; boolean isParallelExec = GrobidServiceProperties.isParallelExec(); - Engine engine = Engine.getEngine(isParallelExec); - try { - originFile = IOUtilities.writeInputFile(inputStream); + engine = Engine.getEngine(isParallelExec); + // conservative check, if no engine is free in the pool a NoSuchElementException is normally thrown + if (engine == null) { + throw new GrobidServiceException( + "No GROBID engine available", Status.SERVICE_UNAVAILABLE); + } + originFile = IOUtilities.writeInputFile(inputStream); if (originFile == null) { - response = Response.status(Status.INTERNAL_SERVER_ERROR).build(); + LOGGER.error("The input file cannot be written."); + throw new GrobidServiceException( + "The input file cannot be written.", Status.INTERNAL_SERVER_ERROR); + } + + out = annotate(originFile, isParallelExec, type, engine, consolidateHeader, consolidateCitations); + if (out != null) { + ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); + out.save(outputStream); + response = Response + .ok() + .type("application/pdf") + .entity(outputStream.toByteArray()) + .header("Content-Disposition", "attachment; filename=\"" + fileName + "\"") + .header("Access-Control-Allow-Origin", "*") + .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT") + .build(); } else { - out = annotate(originFile, isParallelExec, type, engine, consolidateHeader, consolidateCitations); - - if (out != null) { - ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); - out.save(outputStream); - response = Response - .ok() - .type("application/pdf") - .entity(outputStream.toByteArray()) - .header("Content-Disposition", "attachment; filename=\"" + fileName + "\"") - .header("Access-Control-Allow-Origin", "*") - .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT") - .build(); - } else { - response = Response.status(Status.NO_CONTENT).build(); - } + response = Response.status(Status.NO_CONTENT).build(); } + } catch (NoSuchElementException nseExp) { + LOGGER.error("Could not get an engine from the pool within configured time. Sending service unavailable."); + response = Response.status(Status.SERVICE_UNAVAILABLE).build(); + } catch (Exception exp) { + LOGGER.error("An unexpected exception occurs. ", exp); + response = Response.status(Status.INTERNAL_SERVER_ERROR).entity(exp.getMessage()).build(); } finally { - IOUtilities.removeTempFile(originFile); - //IOUtils.closeQuietly(out); + if (originFile != null) + IOUtilities.removeTempFile(originFile); + try { out.close(); } catch (IOException e) { @@ -571,11 +661,24 @@ public Response processPDFReferenceAnnotation(final InputStream inputStream, final boolean consolidateCitations) throws Exception { LOGGER.debug(methodLogIn()); Response response = null; - boolean isparallelExec = GrobidServiceProperties.isParallelExec(); + boolean isParallelExec = GrobidServiceProperties.isParallelExec(); File originFile = null; Engine engine = null; try { + engine = Engine.getEngine(isParallelExec); + // conservative check, if no engine is free in the pool a NoSuchElementException is normally thrown + if (engine == null) { + throw new GrobidServiceException( + "No GROBID engine available", Status.SERVICE_UNAVAILABLE); + } + originFile = IOUtilities.writeInputFile(inputStream); + if (originFile == null) { + LOGGER.error("The input file cannot be written."); + throw new GrobidServiceException( + "The input file cannot be written.", Status.INTERNAL_SERVER_ERROR); + } + List elementWithCoords = new ArrayList<>(); elementWithCoords.add("ref"); elementWithCoords.add("biblStruct"); @@ -586,44 +689,33 @@ public Response processPDFReferenceAnnotation(final InputStream inputStream, .generateTeiCoordinates(elementWithCoords) .build(); - String json; - - if (originFile == null) { - response = Response.status(Status.INTERNAL_SERVER_ERROR).build(); + DocumentSource documentSource = DocumentSource.fromPdf(originFile); + Document teiDoc = engine.fullTextToTEIDoc(originFile, config); + String json = CitationsVisualizer.getJsonAnnotations(teiDoc, null); + + if (json != null) { + response = Response + .ok() + .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON + "; charset=UTF-8") + .entity(json) + .header("Access-Control-Allow-Origin", "*") + .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT") + .build(); } else { - engine = Engine.getEngine(isparallelExec); - DocumentSource documentSource = DocumentSource.fromPdf(originFile); - if (isparallelExec) { - Document teiDoc = engine.fullTextToTEIDoc(originFile, config); - json = CitationsVisualizer.getJsonAnnotations(teiDoc, null); - GrobidPoolingFactory.returnEngine(engine); - engine = null; - } else { - synchronized (engine) { - //TODO: VZ: sync on local var does not make sense - Document teiDoc = engine.fullTextToTEIDoc(originFile, config); - json = CitationsVisualizer.getJsonAnnotations(teiDoc, null); - } - } - - IOUtilities.removeTempFile(originFile); - - if (json != null) { - response = Response - .ok() - //.type("application/json") - .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON + "; charset=UTF-8") - .entity(json) - .header("Access-Control-Allow-Origin", "*") - .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT") - .build(); - } else { - response = Response.status(Status.NO_CONTENT).build(); - } + response = Response.status(Status.NO_CONTENT).build(); } + + } catch (NoSuchElementException nseExp) { + LOGGER.error("Could not get an engine from the pool within configured time. Sending service unavailable."); + response = Response.status(Status.SERVICE_UNAVAILABLE).build(); + } catch (Exception exp) { + LOGGER.error("An unexpected exception occurs. ", exp); + response = Response.status(Status.INTERNAL_SERVER_ERROR).entity(exp.getMessage()).build(); } finally { - IOUtilities.removeTempFile(originFile); - if (isparallelExec && engine != null) { + if (originFile != null) + IOUtilities.removeTempFile(originFile); + + if (isParallelExec && engine != null) { GrobidPoolingFactory.returnEngine(engine); } } @@ -642,48 +734,48 @@ public Response annotateCitationPatentPDF(final InputStream inputStream, final boolean consolidate) throws Exception { LOGGER.debug(methodLogIn()); Response response = null; - String retVal; - boolean isparallelExec = GrobidServiceProperties.isParallelExec(); + String retVal = null; + boolean isParallelExec = GrobidServiceProperties.isParallelExec(); File originFile = null; Engine engine = null; try { - originFile = IOUtilities.writeInputFile(inputStream); + engine = Engine.getEngine(isParallelExec); + // conservative check, if no engine is free in the pool a NoSuchElementException is normally thrown + if (engine == null) { + throw new GrobidServiceException( + "No GROBID engine available", Status.SERVICE_UNAVAILABLE); + } + originFile = IOUtilities.writeInputFile(inputStream); if (originFile == null) { - response = Response.status(Status.INTERNAL_SERVER_ERROR).build(); + LOGGER.error("The input file cannot be written."); + throw new GrobidServiceException( + "The input file cannot be written.", Status.INTERNAL_SERVER_ERROR); + } + + // starts conversion process + retVal = engine.annotateAllCitationsInPDFPatent(originFile.getAbsolutePath(), consolidate); + + if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { + response = Response.status(Status.NO_CONTENT).build(); } else { - // starts conversion process - engine = Engine.getEngine(isparallelExec); - //List patents = new ArrayList(); - //List articles = new ArrayList(); - if (isparallelExec) { - retVal = engine.annotateAllCitationsInPDFPatent(originFile.getAbsolutePath(), - consolidate); - GrobidPoolingFactory.returnEngine(engine); - engine = null; - } else { - synchronized (engine) { - retVal = engine.annotateAllCitationsInPDFPatent(originFile.getAbsolutePath(), - consolidate); - } - } - - IOUtilities.removeTempFile(originFile); - - if (GrobidRestUtils.isResultNullOrEmpty(retVal)) { - response = Response.status(Status.NO_CONTENT).build(); - } else { - //response = Response.status(Status.OK).entity(retVal).type(MediaType.APPLICATION_JSON).build(); - response = Response.status(Status.OK) - .entity(retVal) - .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON + "; charset=UTF-8") - .header("Access-Control-Allow-Origin", "*") - .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT").build(); - } + response = Response.status(Status.OK) + .entity(retVal) + .header(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON + "; charset=UTF-8") + .header("Access-Control-Allow-Origin", "*") + .header("Access-Control-Allow-Methods", "GET, POST, DELETE, PUT").build(); } + } catch (NoSuchElementException nseExp) { + LOGGER.error("Could not get an engine from the pool within configured time. Sending service unavailable."); + response = Response.status(Status.SERVICE_UNAVAILABLE).build(); + } catch (Exception exp) { + LOGGER.error("An unexpected exception occurs. ", exp); + response = Response.status(Status.INTERNAL_SERVER_ERROR).entity(exp.getMessage()).build(); } finally { - IOUtilities.removeTempFile(originFile); - if (isparallelExec && engine != null) { + if (originFile != null) + IOUtilities.removeTempFile(originFile); + + if (isParallelExec && engine != null) { GrobidPoolingFactory.returnEngine(engine); } } @@ -699,7 +791,7 @@ public String methodLogOut() { return "<< " + GrobidRestProcessFiles.class.getName() + "." + Thread.currentThread().getStackTrace()[1].getMethodName(); } - protected PDDocument annotate(File originFile, boolean isparallelExec, + protected PDDocument annotate(File originFile, boolean isParallelExec, final GrobidRestUtils.Annotation type, Engine engine, final boolean consolidateHeader, final boolean consolidateCitations) throws Exception { @@ -718,28 +810,17 @@ protected PDDocument annotate(File originFile, boolean isparallelExec, .build(); Document teiDoc = engine.fullTextToTEIDoc(originFile, config); - //try - { - PDDocument document = PDDocument.load(originFile); - //If no pages, skip the document - if (document.getNumberOfPages() > 0) { - //DocumentSource documentSource = DocumentSource.fromPdf(originFile); - DocumentSource documentSource = teiDoc.getDocumentSource(); - if (isparallelExec) { - outputDocument = dispatchProcessing(type, document, documentSource, teiDoc); - GrobidPoolingFactory.returnEngine(engine); - } else { - synchronized (engine) { - //TODO: VZ: sync on local var does not make sense - outputDocument = dispatchProcessing(type, document, documentSource, teiDoc); - } - } - } else { - throw new RuntimeException("Cannot identify any pages in the input document. " + - "The document cannot be annotated. Please check whether the document is valid or the logs."); - } - } + PDDocument document = PDDocument.load(originFile); + //If no pages, skip the document + if (document.getNumberOfPages() > 0) { + DocumentSource documentSource = teiDoc.getDocumentSource(); + outputDocument = dispatchProcessing(type, document, documentSource, teiDoc); + } else { + throw new RuntimeException("Cannot identify any pages in the input document. " + + "The document cannot be annotated. Please check whether the document is valid or the logs."); + } + return outputDocument; }