Skip to content

Commit

Permalink
Merge f73ccae into 1be47b4
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Apr 26, 2021
2 parents 1be47b4 + f73ccae commit c5a619c
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 55 deletions.
Expand Up @@ -84,9 +84,9 @@ private static synchronized void getNewInstance() {
private Consolidation() {
if (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.GLUTTON)
client = GluttonClient.getInstance();
else
else
client = CrossrefClient.getInstance();
workDeserializer = new WorkDeserializer();
workDeserializer = new WorkDeserializer();
}

public void setCntManager(CntManager cntManager) {
Expand Down Expand Up @@ -147,7 +147,7 @@ public BiblioItem consolidate(BiblioItem bib, String rawCitation) throws Excepti
if (journalTitle != null) {
journalTitle = TextUtilities.removeAccents(journalTitle);
}*/
if (cntManager != null)
if (cntManager != null)
cntManager.i(ConsolidationCounters.CONSOLIDATION);

long threadId = Thread.currentThread().getId();
Expand All @@ -157,12 +157,12 @@ public BiblioItem consolidate(BiblioItem bib, String rawCitation) throws Excepti
// call based on the identified DOI
arguments = new HashMap<String,String>();
arguments.put("doi", doi);
}
}
if (StringUtils.isNotBlank(rawCitation)) {
// call with full raw string
if (arguments == null)
arguments = new HashMap<String,String>();
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
StringUtils.isBlank(doi) )
arguments.put("query.bibliographic", rawCitation);
//arguments.put("query", rawCitation);
Expand All @@ -171,15 +171,15 @@ public BiblioItem consolidate(BiblioItem bib, String rawCitation) throws Excepti
// call based on partial metadata
if (arguments == null)
arguments = new HashMap<String,String>();
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
(StringUtils.isBlank(rawCitation) && StringUtils.isBlank(doi)) )
arguments.put("query.author", aut);
}
if (StringUtils.isNotBlank(title)) {
// call based on partial metadata
if (arguments == null)
arguments = new HashMap<String,String>();
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
(StringUtils.isBlank(rawCitation) && StringUtils.isBlank(doi)) )
arguments.put("query.title", title);
}
Expand Down Expand Up @@ -213,7 +213,7 @@ public BiblioItem consolidate(BiblioItem bib, String rawCitation) throws Excepti
}

if (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.CROSSREF) {
if (StringUtils.isBlank(doi) && StringUtils.isBlank(rawCitation) &&
if (StringUtils.isBlank(doi) && StringUtils.isBlank(rawCitation) &&
(StringUtils.isBlank(aut) || StringUtils.isBlank(title)) ) {
// there's not enough information for a crossref request, which might always return a result
return null;
Expand Down Expand Up @@ -244,7 +244,7 @@ public BiblioItem consolidate(BiblioItem bib, String rawCitation) throws Excepti
}

client.<BiblioItem>pushRequest("works", arguments, workDeserializer, threadId, new CrossrefRequestListener<BiblioItem>(0) {

@Override
public void onSuccess(List<BiblioItem> res) {
if ((res != null) && (res.size() > 0) ) {
Expand All @@ -261,18 +261,18 @@ In case of crossref REST API, for single bib. ref. consolidation (this case come
For all the other case of matching with CrossRef, we require a post-validation.
*/
if (
( (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.GLUTTON) &&
if (
( (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.GLUTTON) &&
!doiQuery
)
||
( (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.GLUTTON) &&
( (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.GLUTTON) &&
StringUtils.isNotBlank(oneRes.getDOI()) &&
doi.equals(oneRes.getDOI())
)
||
( (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.CROSSREF) &&
(doiQuery) )
( (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.CROSSREF) &&
(doiQuery) )
||
postValidation(bib, oneRes)) {
results.add(oneRes);
Expand All @@ -284,7 +284,7 @@ In case of crossref REST API, for single bib. ref. consolidation (this case come
break;
}
}
}
}
}

@Override
Expand All @@ -293,8 +293,8 @@ public void onError(int status, String message, Exception exception) {
}
});
} catch(Exception e) {
LOGGER.info("Consolidation error - " + ExceptionUtils.getStackTrace(e));
}
LOGGER.info("Consolidation error - ",e);
}

client.finish(threadId);
if (results.size() == 0)
Expand All @@ -307,7 +307,7 @@ public void onError(int status, String message, Exception exception) {
/**
* Try tp consolidate a list of bibliographical objects in one operation with consolidation services
*/
public Map<Integer,BiblioItem> consolidate(List<BibDataSet> biblios) {
public Map<Integer,BiblioItem> consolidate(List<BibDataSet> biblios) {
if (CollectionUtils.isEmpty(biblios))
return null;
final Map<Integer,BiblioItem> results = new HashMap<Integer,BiblioItem>();
Expand All @@ -321,7 +321,7 @@ public Map<Integer,BiblioItem> consolidate(List<BibDataSet> biblios) {
for(BibDataSet bibDataSet : biblios) {
final BiblioItem theBiblio = bibDataSet.getResBib();

if (cntManager != null)
if (cntManager != null)
cntManager.i(ConsolidationCounters.TOTAL_BIB_REF);

// first we get the exploitable metadata
Expand All @@ -332,7 +332,7 @@ public Map<Integer,BiblioItem> consolidate(List<BibDataSet> biblios) {
String aut = theBiblio.getFirstAuthorSurname();
String title = theBiblio.getTitle();
String journalTitle = theBiblio.getJournal();

// and the row string
String rawCitation = bibDataSet.getRawBib();

Expand Down Expand Up @@ -369,28 +369,28 @@ public Map<Integer,BiblioItem> consolidate(List<BibDataSet> biblios) {
// call based on the identified DOI
arguments = new HashMap<String,String>();
arguments.put("doi", doi);
}
}
if (StringUtils.isNotBlank(rawCitation)) {
// call with full raw string
if (arguments == null)
arguments = new HashMap<String,String>();
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
StringUtils.isBlank(doi) )
arguments.put("query.bibliographic", rawCitation);
}
if (StringUtils.isNotBlank(title)) {
// call based on partial metadata
if (arguments == null)
arguments = new HashMap<String,String>();
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
(StringUtils.isBlank(rawCitation) && StringUtils.isBlank(doi)) )
arguments.put("query.title", title);
}
if (StringUtils.isNotBlank(aut)) {
// call based on partial metadata
if (arguments == null)
arguments = new HashMap<String,String>();
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
if ( (GrobidProperties.getInstance().getConsolidationService() != GrobidConsolidationService.CROSSREF) ||
(StringUtils.isBlank(rawCitation) && StringUtils.isBlank(doi)) )
arguments.put("query.author", aut);
}
Expand Down Expand Up @@ -418,14 +418,14 @@ public Map<Integer,BiblioItem> consolidate(List<BibDataSet> biblios) {
arguments.put("firstPage", firstPage);
}
}

if (arguments == null || arguments.size() == 0) {
n++;
continue;
}

if (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.CROSSREF) {
if (StringUtils.isBlank(doi) && StringUtils.isBlank(rawCitation) &&
if (StringUtils.isBlank(doi) && StringUtils.isBlank(rawCitation) &&
(StringUtils.isBlank(aut) || StringUtils.isBlank(title)) ) {
// there's not enough information for a crossref request, which might always return a result
n++;
Expand Down Expand Up @@ -457,7 +457,7 @@ else if (GrobidProperties.getInstance().getConsolidationService() == GrobidConso
}

client.<BiblioItem>pushRequest("works", arguments, workDeserializer, threadId, new CrossrefRequestListener<BiblioItem>(n) {

@Override
public void onSuccess(List<BiblioItem> res) {
if ((res != null) && (res.size() > 0) ) {
Expand All @@ -475,7 +475,7 @@ public void onSuccess(List<BiblioItem> res) {
break;
}
}
}
}
}

@Override
Expand All @@ -484,8 +484,8 @@ public void onError(int status, String message, Exception exception) {
}
});
} catch(Exception e) {
LOGGER.info("Consolidation error - " + ExceptionUtils.getStackTrace(e));
}
LOGGER.info("Consolidation error - ", e);
}
n++;
}
client.finish(threadId);
Expand Down Expand Up @@ -612,25 +612,26 @@ public void onError(int status, String message, Exception exception) {
/**
* This is a DOI cleaning specifically adapted to CrossRef call
*/
private static String cleanDoi(String doi) {
protected static String cleanDoi(String doi) {
doi = doi.replace("\"", "");
doi = doi.replace("\n", "");
if (doi.startsWith("doi:") || doi.startsWith("DOI:") ||
doi.startsWith("doi/") || doi.startsWith("DOI/") ) {
doi.substring(4, doi.length());
if (StringUtils.lowerCase(doi).startsWith("doi:") || StringUtils.lowerCase(doi).startsWith("doi/")) {
doi = doi.substring(4);
doi = doi.trim();
}
doi = doi.replaceAll("[\\p{M}]", "");
doi = doi.replaceAll("\\p{InCombiningDiacriticalMarks}+", "");

doi = doi.replace(" ", "");
return doi;
}


/**
* The new public CrossRef API is a search API, and thus returns
* many false positives. It is necessary to validate return results
* against the (incomplete) source bibliographic item to block
* inconsistent results.
* The new public CrossRef API is a search API, and thus returns
* many false positives. It is necessary to validate return results
* against the (incomplete) source bibliographic item to block
* inconsistent results.
*/
private boolean postValidation(BiblioItem source, BiblioItem result) {
boolean valid = true;
Expand All @@ -642,7 +643,7 @@ private boolean postValidation(BiblioItem source, BiblioItem result) {
return false;
}*/

if (!StringUtils.isBlank(source.getFirstAuthorSurname()) &&
if (!StringUtils.isBlank(source.getFirstAuthorSurname()) &&
!StringUtils.isBlank(result.getFirstAuthorSurname())) {
//System.out.println(source.getFirstAuthorSurname() + " / " + result.getFirstAuthorSurname() + " = " +
// ratcliffObershelpDistance(source.getFirstAuthorSurname(), result.getFirstAuthorSurname(), false));
Expand Down Expand Up @@ -670,12 +671,12 @@ private double ratcliffObershelpDistance(String string1, String string2, boolean
if (string1.equals(string2))
similarity = 1.0;
if ( (string1.length() > 0) && (string2.length() > 0) ) {
Option<Object> similarityObject =
Option<Object> similarityObject =
RatcliffObershelpMetric.compare(string1, string2);
if ( (similarityObject != null) && (similarityObject.get() != null) )
similarity = (Double)similarityObject.get();
}

return similarity.doubleValue();
}

Expand Down
@@ -1,24 +1,14 @@
package org.grobid.core.utilities.glutton;

import java.io.Closeable;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.Future;
import java.util.concurrent.ExecutionException;

import org.apache.commons.lang3.concurrent.TimedSemaphore;
import org.apache.http.client.ClientProtocolException;
import org.grobid.core.utilities.crossref.CrossrefRequestListener.Response;
import org.grobid.core.utilities.crossref.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -30,7 +20,7 @@
*/
//public class GluttonClient implements Closeable {
public class GluttonClient extends CrossrefClient {
public static final Logger logger = LoggerFactory.getLogger(GluttonClient.class);
public static final Logger LOGGER = LoggerFactory.getLogger(GluttonClient.class);

private static volatile GluttonClient instance;

Expand All @@ -54,7 +44,7 @@ public static GluttonClient getInstance() {
* Creates a new instance.
*/
private static synchronized void getNewInstance() {
logger.debug("Get new instance of GluttonClient");
LOGGER.debug("Get new instance of GluttonClient");
instance = new GluttonClient();
}

Expand All @@ -72,7 +62,7 @@ private GluttonClient() {
this.futures = new HashMap<>();*/
int nThreads = Runtime.getRuntime().availableProcessors();
//int nThreads = (int) Math.ceil((double)Runtime.getRuntime().availableProcessors() / 2);
System.out.println("nThreads: " + nThreads);
LOGGER.info("nThreads: " + nThreads);
this.executorService = Executors.newFixedThreadPool(nThreads*2);
//setLimits(20, 1000); // default calls per second
}
Expand Down Expand Up @@ -104,7 +94,7 @@ public Thread newThread(Runnable r) {
}*/

public static void printLog(GluttonRequest<?> request, String message) {
logger.info((request != null ? request+": " : "")+message);
LOGGER.info((request != null ? request+": " : "")+message);
//System.out.println((request != null ? request+": " : "")+message);
}

Expand Down
@@ -0,0 +1,43 @@
package org.grobid.core.utilities;

import org.grobid.core.data.BiblioItem;
import org.grobid.core.main.LibraryLoader;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;

public class ConsolidationTest {


@Test
public void testCleanDoiPrefix1_shouldRemovePrefix() throws Exception {

String doi = "doi:10.1063/1.1905789";
String cleanDoi = Consolidation.cleanDoi(doi);

assertThat(cleanDoi, is("10.1063/1.1905789"));
}

@Test
public void testCleanDoiPrefix2_shouldRemovePrefix() throws Exception {

String doi = "doi/10.1063/1.1905789";
String cleanDoi = Consolidation.cleanDoi(doi);

assertThat(cleanDoi, is("10.1063/1.1905789"));
}

@Test
public void testCleanDoi_diactric() throws Exception {
String doi = "10.1063/1.1905789͔";

String cleanDoi = Consolidation.cleanDoi(doi);

assertThat(cleanDoi, is("10.1063/1.1905789"));
}

}

0 comments on commit c5a619c

Please sign in to comment.