From 81e959fa0bb145ce517cb95612e1a1e3582ea6c1 Mon Sep 17 00:00:00 2001 From: Patrice Lopez Date: Fri, 19 Mar 2021 20:11:39 +0100 Subject: [PATCH] review sleep time and add more comments --- .../utilities/crossref/CrossrefClient.java | 18 +++++++++++++----- .../utilities/crossref/CrossrefRequest.java | 8 +++++--- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/crossref/CrossrefClient.java b/grobid-core/src/main/java/org/grobid/core/utilities/crossref/CrossrefClient.java index 5d3729c4f1..661738ce53 100644 --- a/grobid-core/src/main/java/org/grobid/core/utilities/crossref/CrossrefClient.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/crossref/CrossrefClient.java @@ -19,7 +19,11 @@ * Request pool to get data from api.crossref.org without exceeding limits * supporting multi-thread. * - * @author Vincent Kaestle, Patrice + * Note: the provided interval for the query rate returned by CrossRef appeared to be note reliable, + * so we have to use the rate limit (X-Rate-Limit-Interval) as a global parallel query limit, without + * interval consideration. + * See https://github.com/kermitt2/grobid/pull/725 + * */ public class CrossrefClient implements Closeable { public static final Logger logger = LoggerFactory.getLogger(CrossrefRequestTask.class); @@ -55,6 +59,9 @@ private static synchronized void getNewInstance() { * Hidden constructor */ protected CrossrefClient() { + // note: by default timeout with newCachedThreadPool is set to 60s, which might be too much for crossref usage, + // hanging grobid significantly, so we might want to use rather a custom instance of ThreadPoolExecutor and set + // the timeout sifferently this.executorService = Executors.newCachedThreadPool(r -> { Thread t = Executors.defaultThreadFactory().newThread(r); t.setDaemon(true); @@ -71,13 +78,14 @@ public static void printLog(CrossrefRequest request, String message) { public void setLimits(int iterations, int interval) { this.setMax_pool_size(iterations); - // interval is not useful here ! we should wait termination of each thread + // interval is not usable anymore, we need to wait termination of threads independently from any time interval } public void updateLimits(int iterations, int interval) { if (this.limitAuto) { //printLog(null, "Updating limits... " + iterations + " / " + interval); this.setLimits(iterations, interval); + // note: interval not used anymore } } @@ -90,10 +98,10 @@ public void pushRequest(CrossrefRequest request, CrossrefR if (listener != null) request.addListener(listener); synchronized(this) { - // we should limite the number of active threads depending on crossref api limits - while(((ThreadPoolExecutor)executorService).getActiveCount()>=this.getMax_pool_size()) { + // we limit the number of active threads to the crossref api dynamic limit returned in the response header + while(((ThreadPoolExecutor)executorService).getActiveCount() >= this.getMax_pool_size()) { try { - TimeUnit.MICROSECONDS.sleep(1); + TimeUnit.MICROSECONDS.sleep(10); } catch (InterruptedException e) { e.printStackTrace(); } diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/crossref/CrossrefRequest.java b/grobid-core/src/main/java/org/grobid/core/utilities/crossref/CrossrefRequest.java index ab5d5a4735..1b24caf430 100644 --- a/grobid-core/src/main/java/org/grobid/core/utilities/crossref/CrossrefRequest.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/crossref/CrossrefRequest.java @@ -156,11 +156,13 @@ public Void handleResponse(HttpResponse response) throws ClientProtocolException message.status = response.getStatusLine().getStatusCode(); + // note: header field names are case insensitive Header limitIntervalHeader = response.getFirstHeader("X-Rate-Limit-Interval"); Header limitLimitHeader = response.getFirstHeader("X-Rate-Limit-Limit"); - if (limitIntervalHeader != null && limitLimitHeader != null) - message.setTimeLimit(limitIntervalHeader.getValue(), limitLimitHeader.getValue()); - + if (limitIntervalHeader != null && limitLimitHeader != null) { + message.setTimeLimit(limitIntervalHeader.getValue(), limitLimitHeader.getValue()); + } + if (message.status < 200 || message.status >= 300) { message.errorMessage = response.getStatusLine().getReasonPhrase(); notifyListeners(message);