fix issue #461

kermitt2 · Jul 23, 2019 · 9a20338 · 9a20338
1 parent 6699124
commit 9a20338
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 8 deletions.
diff --git a/doc/References.md b/doc/References.md
@@ -55,12 +55,16 @@ Isaac G. Councill, C. Lee Giles, Min-Yen Kan. (2008) ParsCit: An open-source CRF
 
 ## Other similar Open Source tools
 
-CiteSeerX page on [Scholarly Information Extraction](http://csxstatic.ist.psu.edu/about/scholarly-information-extraction) which list many tools and related information. 
-
 [parsCit](http://wing.comp.nus.edu.sg/parsCit)
 
 [CERMINE](https://github.com/CeON/CERMINE)
 
+[Science Parse](https://github.com/allenai/science-parse) 
+
+[science Parse v2](https://github.com/allenai/spv2) 
+
 [Metatagger](https://github.com/iesl/rexa1-metatagger)
 
 [BILBO](https://github.com/OpenEdition/bilbo)
+
+CiteSeerX page on [Scholarly Information Extraction](http://csxstatic.ist.psu.edu/downloads/software#Services) which lists tools and related information (now a bit outdated). 
diff --git a/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java b/grobid-core/src/main/java/org/grobid/core/engines/FullTextParser.java
@@ -140,6 +140,7 @@ public Document processing(DocumentSource documentSource,
                  (resHeader.getFullAuthors().size() == 0) ) {
                 resHeader = new BiblioItem();
                 parsers.getHeaderParser().processingHeaderSection(config.getConsolidateHeader(), doc, resHeader);
+                // above, use the segmentation model result
                 if (doc.getMetadata() != null) {
                     Metadata metadata = doc.getMetadata();
                     if (metadata.getTitle() != null)
@@ -158,7 +159,6 @@ public Document processing(DocumentSource documentSource,
                         parsers.getHeaderParser().consolidateHeader(resHeader, config.getConsolidateHeader());
                     }
                 }
-                // above, use the segmentation model result
             }
             // structure the abstract using the fulltext model
             if ( (resHeader.getAbstract() != null) && (resHeader.getAbstract().length() > 0) ) {

diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/Consolidation.java b/grobid-core/src/main/java/org/grobid/core/utilities/Consolidation.java
@@ -132,10 +132,11 @@ public void close() {
     public BiblioItem consolidate(BiblioItem bib, String rawCitation) throws Exception {
         final List<BiblioItem> results = new ArrayList<BiblioItem>();
 
-        String doi = bib.getDOI();
-        if (StringUtils.isNotBlank(doi)) {
-            doi = cleanDoi(doi);
+        String theDOI = bib.getDOI();
+        if (StringUtils.isNotBlank(theDOI)) {
+            theDOI = cleanDoi(theDOI);
         }
+        final String doi = theDOI;
         String aut = bib.getFirstAuthorSurname();
         String title = bib.getTitle();
         String journalTitle = bib.getJournal();
@@ -271,14 +272,23 @@ public void onSuccess(List<BiblioItem> res) {
                         for(BiblioItem oneRes : res) {
                             /* 
                               Glutton integrates its own post-validation, so we can skip post-validation in GROBID when it is used as 
-                              consolidation service.  
+                              consolidation service - except in specific case where the DOI is failing and the consolidation is based on 
+                              extracted title and author.  
 
                               In case of crossref REST API, for single bib. ref. consolidation (this case comes only for header extraction), 
                               having an extracted DOI matching is considered safe enough, and we don't require further post-validation.
 
                               For all the other case of matching with CrossRef, we require a post-validation. 
                             */
-                            if ((GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.GLUTTON) 
+                            if ( 
+                                ( (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.GLUTTON) && 
+                                    !doiQuery
+                                )
+                                ||
+                                ( (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.GLUTTON) && 
+                                    StringUtils.isNotBlank(oneRes.getDOI()) &&
+                                    doi.equals(oneRes.getDOI())
+                                )
                                 ||
                                 ( (GrobidProperties.getInstance().getConsolidationService() == GrobidConsolidationService.CROSSREF) && 
                                   (doiQuery) )