From 475d6b5c3965b089dc68f07a7bbf1c4681b41d02 Mon Sep 17 00:00:00 2001 From: lopez Date: Sun, 18 Oct 2020 22:16:55 +0200 Subject: [PATCH] review delft model loading when archtecture is selected --- .../core/engines/tagging/TaggerFactory.java | 9 --------- .../java/org/grobid/core/jni/DeLFTModel.java | 19 ++++++++++++------- grobid-home/config/grobid.properties | 6 +++--- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/engines/tagging/TaggerFactory.java b/grobid-core/src/main/java/org/grobid/core/engines/tagging/TaggerFactory.java index b4a475f98f..6e4698dcfd 100644 --- a/grobid-core/src/main/java/org/grobid/core/engines/tagging/TaggerFactory.java +++ b/grobid-core/src/main/java/org/grobid/core/engines/tagging/TaggerFactory.java @@ -48,15 +48,6 @@ public static synchronized GenericTagger getTagger(GrobidModel model, GrobidCRFE t = new WapitiTagger(model); break; case DELFT: - // be sure the native JEP lib can be loaded -// try { -// String libraryFolder = LibraryLoader.getLibraryFolder(); -// System.out.println(libraryFolder); -// LibraryLoader.addLibraryPath(libraryFolder); -// } catch (Exception e) { -// LOGGER.info("Loading JEP native library for DeLFT failed", e); -// } - t = new DeLFTTagger(model, architecture); break; default: diff --git a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java index ae52df74d8..805ac1a510 100644 --- a/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java +++ b/grobid-core/src/main/java/org/grobid/core/jni/DeLFTModel.java @@ -32,7 +32,7 @@ public class DeLFTModel { public DeLFTModel(GrobidModel model, String architecture) { this.modelName = model.getModelName().replace("-", "_"); try { - LOGGER.info("Loading DeLFT model for " + model.getModelName() + "..."); + LOGGER.info("Loading DeLFT model for " + model.getModelName() + " with architecture " + architecture + "..."); JEPThreadPool.getInstance().run(new InitModel(this.modelName, GrobidProperties.getInstance().getModelPath(), architecture)); } catch(InterruptedException e) { LOGGER.error("DeLFT model " + this.modelName + " initialization failed", e); @@ -54,10 +54,15 @@ public InitModel(String modelName, File modelPath, String architecture) { public void run() { Jep jep = JEPThreadPool.getInstance().getJEPInstance(); try { - if (this.architecture == null) - jep.eval(this.modelName+" = Sequence('" + this.modelName.replace("_", "-") + "')"); - else - jep.eval(this.modelName+" = Sequence('" + this.modelName.replace("_", "-") + "', model_type='" + this.architecture + "')"); + String fullModelName = this.modelName.replace("_", "-"); + + if (architecture != null && !architecture.equals("BidLSTM_CRF")) + fullModelName += "-" + this.architecture; + + if (GrobidProperties.getInstance().useELMo() && modelName.toLowerCase().indexOf("bert") == -1) + fullModelName += "-with_ELMo"; + + jep.eval(this.modelName+" = Sequence('" + fullModelName + "')"); jep.eval(this.modelName+".load(dir_path='"+modelPath.getAbsolutePath()+"')"); } catch(JepException e) { throw new GrobidException("DeLFT model initialization failed. ", e); @@ -211,7 +216,7 @@ public void run() { jep.eval("print(len(x_valid), 'validation sequences')"); String useELMo = "False"; - if (GrobidProperties.getInstance().useELMo()) { + if (GrobidProperties.getInstance().useELMo() && modelName.toLowerCase().indexOf("bert") == -1) { useELMo = "True"; } @@ -266,7 +271,7 @@ public static void train(String modelName, File trainingData, File outputModel, command.add("--architecture"); command.add(architecture); } - if (GrobidProperties.getInstance().useELMo()) { + if (GrobidProperties.getInstance().useELMo() && modelName.toLowerCase().indexOf("bert") == -1) { command.add("--use-ELMo"); } diff --git a/grobid-home/config/grobid.properties b/grobid-home/config/grobid.properties index 4e61922873..619e0cc7d9 100755 --- a/grobid-home/config/grobid.properties +++ b/grobid-home/config/grobid.properties @@ -50,15 +50,15 @@ grobid.crf.engine.fulltext=wapiti #grobid.crf.engine.figure=wapiti #grobid.crf.engine.table=wapiti #grobid.crf.engine.name_citation=wapiti -#grobid.crf.engine.affiliation_address=wapiti +#grobid.crf.engine.affiliation_address=delft #grobid.crf.engine.citation=delft grobid.delft.install=../delft grobid.delft.useELMo=false grobid.delft.python.virtualEnv= grobid.delft.redirect.output=true -grobid.delft.architecture=bidLSTM_CRF -#grobid.delft.architecture=SciBERT +grobid.delft.architecture=BidLSTM_CRF +#grobid.delft.architecture=scibert grobid.pdf.blocks.max=100000 grobid.pdf.tokens.max=1000000