diff --git a/.github/workflows/ci-build-manual-crf.yml b/.github/workflows/ci-build-manual-crf.yml index aa1352478d..619529d63b 100644 --- a/.github/workflows/ci-build-manual-crf.yml +++ b/.github/workflows/ci-build-manual-crf.yml @@ -1,6 +1,7 @@ name: Build and push a CRF-only docker image -on: "workflow_dispatch" +on: + workflow_dispatch: jobs: @@ -38,4 +39,4 @@ jobs: pushImage: true tags: latest-develop, latest-crf - name: Image digest - run: echo ${{ steps.docker_build.outputs.digest }} \ No newline at end of file + run: echo ${{ steps.docker_build.outputs.digest }} diff --git a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java index f66baaa0c0..7359577bcf 100755 --- a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java +++ b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java @@ -1611,6 +1611,9 @@ public StringBuilder toTEITextPiece(StringBuilder buffer, Note note = labels2Notes.get(matching.getLeft()); OffsetPosition matchingPosition = matching.getRight(); + if (pos >= matchingPosition.start) + break; + List before = clusterTokens.subList(pos, matchingPosition.start); String clusterContentBefore = LayoutTokensUtil.normalizeDehyphenizeText(before); diff --git a/grobid-home/config/grobid.yaml b/grobid-home/config/grobid.yaml index 86cb10a2cb..647110ecdd 100644 --- a/grobid-home/config/grobid.yaml +++ b/grobid-home/config/grobid.yaml @@ -25,11 +25,11 @@ grobid: consolidation: # define the bibliographical data consolidation service to be used, either "crossref" for CrossRef REST API or # "glutton" for https://github.com/kermitt2/biblio-glutton - service: "crossref" - #service: "glutton" + #service: "crossref" + service: "glutton" glutton: - url: "https://cloud.science-miner.com/glutton" - #url: "http://localhost:8080" + #url: "https://cloud.science-miner.com/glutton" + url: "http://localhost:8080" crossref: mailto: # to use crossref web API, you need normally to use it politely and to indicate an email address here, e.g.