From bc96ba10c1bc0ba5ceb30785ee89597110e03e50 Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Mon, 12 Oct 2020 10:59:27 +0900 Subject: [PATCH 1/3] loading also the engines that are specified in the individual models configurations --- .../org/grobid/core/main/LibraryLoader.java | 13 +++++----- .../core/utilities/GrobidProperties.java | 26 +++++++++++++++---- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java b/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java index 3dd7fd16ee..bf29343e18 100755 --- a/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java +++ b/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java @@ -1,5 +1,6 @@ package org.grobid.core.main; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.SystemUtils; import org.grobid.core.engines.tagging.GrobidCRFEngine; @@ -11,12 +12,12 @@ import org.slf4j.LoggerFactory; import java.io.File; -import java.io.FileFilter; import java.io.FilenameFilter; import java.lang.reflect.Field; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; +import java.util.Collections; import static org.apache.commons.lang3.ArrayUtils.isEmpty; @@ -95,8 +96,8 @@ public static void load() { } - if (GrobidProperties.getGrobidCRFEngine() == GrobidCRFEngine.WAPITI || - GrobidProperties.getGrobidCRFEngine() == GrobidCRFEngine.DELFT) { + if (CollectionUtils + .containsAny(GrobidProperties.getDistinctModels(), Arrays.asList(GrobidCRFEngine.WAPITI, GrobidCRFEngine.DELFT))) { // note: if DeLFT is used, we still make Wapiti available for models not existing in DeLFT (currently segmentation and // fulltext) File[] wapitiLibFiles = libraryFolder.listFiles(new FilenameFilter() { @@ -110,7 +111,7 @@ public boolean accept(File dir, String name) { LOGGER.info("No wapiti library in the Grobid home folder"); } else { LOGGER.info("Loading Wapiti native library..."); - if (GrobidProperties.getGrobidCRFEngine() == GrobidCRFEngine.DELFT) { + if (CollectionUtils.containsAny(GrobidProperties.getDistinctModels(), Collections.singletonList(GrobidCRFEngine.DELFT))) { // if DeLFT will be used, we must not load libstdc++, it would create a conflict with tensorflow libstdc++ version // so we temporary rename the lib so that it is not loaded in this case // note that we know that, in this case, the local lib can be ignored because as DeFLT and tensorflow are installed @@ -126,7 +127,7 @@ public boolean accept(File dir, String name) { try { System.load(wapitiLibFiles[0].getAbsolutePath()); } finally { - if (GrobidProperties.getGrobidCRFEngine() == GrobidCRFEngine.DELFT) { + if (CollectionUtils.containsAny(GrobidProperties.getDistinctModels(), Arrays.asList(GrobidCRFEngine.DELFT))) { // restore libstdc++ String libstdcppPathNew = libraryFolder.getAbsolutePath() + File.separator + "libstdc++.so.6.new"; File libstdcppFileNew = new File(libstdcppPathNew); @@ -140,7 +141,7 @@ public boolean accept(File dir, String name) { } - if (GrobidProperties.getGrobidCRFEngine() == GrobidCRFEngine.DELFT) { + if (CollectionUtils.containsAny(GrobidProperties.getDistinctModels(), Collections.singletonList(GrobidCRFEngine.DELFT))) { LOGGER.info("Loading JEP native library for DeLFT... " + libraryFolder.getAbsolutePath()); // actual loading will be made at JEP initialization, so we just need to add the path in the // java.library.path (JEP will anyway try to load from java.library.path, so explicit file diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java index 971e08ed69..9175ba1007 100755 --- a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java @@ -15,9 +15,8 @@ import java.io.*; import java.nio.file.Files; import java.nio.file.Paths; -import java.util.Enumeration; -import java.util.Map; -import java.util.Properties; +import java.util.*; +import java.util.stream.Collectors; /** * This class loads contains all names of grobid-properties and provide methods @@ -298,10 +297,10 @@ private void init() { try { getProps().load(new FileInputStream(getGrobidPropertiesPath())); } catch (IOException exp) { - throw new GrobidPropertyException("Cannot open file of grobid.properties at location'" + GROBID_PROPERTY_PATH.getAbsolutePath() + throw new GrobidPropertyException("Cannot open file of grobid.properties at location '" + GROBID_PROPERTY_PATH.getAbsolutePath() + "'", exp); } catch (Exception exp) { - throw new GrobidPropertyException("Cannot open file of grobid properties" + getGrobidPropertiesPath().getAbsolutePath(), exp); + throw new GrobidPropertyException("Cannot open file of grobid properties " + getGrobidPropertiesPath().getAbsolutePath(), exp); } getProps().putAll(getEnvironmentVariableOverrides(System.getenv())); @@ -312,6 +311,23 @@ private void init() { loadCrfEngine(); } + /** Return the distinct values of all the engines that are needed */ + public static Set getDistinctModels() { + List modelSpecificEngines = getModelSpecificEngines(); + modelSpecificEngines.add(getGrobidCRFEngine().getExt()); + + return new HashSet(modelSpecificEngines); + } + + /** Return the distinct values of all the engines specified in the individual model configuration in the property file **/ + public static List getModelSpecificEngines() { + return getProps().keySet().stream() + .filter(k -> ((String) k).startsWith(GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE + '.')) + .map(k -> StringUtils.lowerCase(getPropertyValue((String) k))) + .distinct() + .collect(Collectors.toList()); + } + protected static void loadCrfEngine() { grobidCRFEngine = GrobidCRFEngine.get(getPropertyValue(GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE, GrobidCRFEngine.WAPITI.name())); From 79050a506ad53cf92b81d0fe1efb1527e27122ee Mon Sep 17 00:00:00 2001 From: Luca Foppiano Date: Mon, 12 Oct 2020 11:20:44 +0900 Subject: [PATCH 2/3] using the right object type --- .../main/java/org/grobid/core/main/LibraryLoader.java | 10 ++++++---- .../org/grobid/core/utilities/GrobidProperties.java | 10 +++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java b/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java index bf29343e18..cbf8215e3a 100755 --- a/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java +++ b/grobid-core/src/main/java/org/grobid/core/main/LibraryLoader.java @@ -18,6 +18,7 @@ import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; +import java.util.Set; import static org.apache.commons.lang3.ArrayUtils.isEmpty; @@ -96,8 +97,9 @@ public static void load() { } + Set distinctModels = GrobidProperties.getDistinctModels(); if (CollectionUtils - .containsAny(GrobidProperties.getDistinctModels(), Arrays.asList(GrobidCRFEngine.WAPITI, GrobidCRFEngine.DELFT))) { + .containsAny(distinctModels, Arrays.asList(GrobidCRFEngine.WAPITI, GrobidCRFEngine.DELFT))) { // note: if DeLFT is used, we still make Wapiti available for models not existing in DeLFT (currently segmentation and // fulltext) File[] wapitiLibFiles = libraryFolder.listFiles(new FilenameFilter() { @@ -111,7 +113,7 @@ public boolean accept(File dir, String name) { LOGGER.info("No wapiti library in the Grobid home folder"); } else { LOGGER.info("Loading Wapiti native library..."); - if (CollectionUtils.containsAny(GrobidProperties.getDistinctModels(), Collections.singletonList(GrobidCRFEngine.DELFT))) { + if (CollectionUtils.containsAny(distinctModels, Collections.singletonList(GrobidCRFEngine.DELFT))) { // if DeLFT will be used, we must not load libstdc++, it would create a conflict with tensorflow libstdc++ version // so we temporary rename the lib so that it is not loaded in this case // note that we know that, in this case, the local lib can be ignored because as DeFLT and tensorflow are installed @@ -127,7 +129,7 @@ public boolean accept(File dir, String name) { try { System.load(wapitiLibFiles[0].getAbsolutePath()); } finally { - if (CollectionUtils.containsAny(GrobidProperties.getDistinctModels(), Arrays.asList(GrobidCRFEngine.DELFT))) { + if (CollectionUtils.containsAny(distinctModels, Arrays.asList(GrobidCRFEngine.DELFT))) { // restore libstdc++ String libstdcppPathNew = libraryFolder.getAbsolutePath() + File.separator + "libstdc++.so.6.new"; File libstdcppFileNew = new File(libstdcppPathNew); @@ -141,7 +143,7 @@ public boolean accept(File dir, String name) { } - if (CollectionUtils.containsAny(GrobidProperties.getDistinctModels(), Collections.singletonList(GrobidCRFEngine.DELFT))) { + if (CollectionUtils.containsAny(distinctModels, Collections.singletonList(GrobidCRFEngine.DELFT))) { LOGGER.info("Loading JEP native library for DeLFT... " + libraryFolder.getAbsolutePath()); // actual loading will be made at JEP initialization, so we just need to add the path in the // java.library.path (JEP will anyway try to load from java.library.path, so explicit file diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java index 9175ba1007..f14538a0b5 100755 --- a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java @@ -312,18 +312,18 @@ private void init() { } /** Return the distinct values of all the engines that are needed */ - public static Set getDistinctModels() { - List modelSpecificEngines = getModelSpecificEngines(); - modelSpecificEngines.add(getGrobidCRFEngine().getExt()); + public static Set getDistinctModels() { + List modelSpecificEngines = getModelSpecificEngines(); + modelSpecificEngines.add(getGrobidCRFEngine()); return new HashSet(modelSpecificEngines); } /** Return the distinct values of all the engines specified in the individual model configuration in the property file **/ - public static List getModelSpecificEngines() { + public static List getModelSpecificEngines() { return getProps().keySet().stream() .filter(k -> ((String) k).startsWith(GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE + '.')) - .map(k -> StringUtils.lowerCase(getPropertyValue((String) k))) + .map(k -> GrobidCRFEngine.get(StringUtils.lowerCase(getPropertyValue((String) k)))) .distinct() .collect(Collectors.toList()); } From adc0a94e5318e314a0c5ed3657ba0df8f64457bd Mon Sep 17 00:00:00 2001 From: lfoppiano Date: Tue, 13 Oct 2020 05:27:56 +0900 Subject: [PATCH 3/3] update after review --- .../grobid/core/utilities/GrobidProperties.java | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java index f14538a0b5..c06bb6aaac 100755 --- a/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java +++ b/grobid-core/src/main/java/org/grobid/core/utilities/GrobidProperties.java @@ -7,12 +7,15 @@ import org.grobid.core.engines.tagging.GrobidCRFEngine; import org.grobid.core.exceptions.GrobidPropertyException; import org.grobid.core.exceptions.GrobidResourceException; -import org.grobid.core.utilities.Consolidation.GrobidConsolidationService; import org.grobid.core.main.GrobidHomeFinder; +import org.grobid.core.utilities.Consolidation.GrobidConsolidationService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; import java.nio.file.Files; import java.nio.file.Paths; import java.util.*; @@ -313,19 +316,19 @@ private void init() { /** Return the distinct values of all the engines that are needed */ public static Set getDistinctModels() { - List modelSpecificEngines = getModelSpecificEngines(); + final Set modelSpecificEngines = new HashSet<>(getModelSpecificEngines()); modelSpecificEngines.add(getGrobidCRFEngine()); - return new HashSet(modelSpecificEngines); + return modelSpecificEngines; } /** Return the distinct values of all the engines specified in the individual model configuration in the property file **/ - public static List getModelSpecificEngines() { + public static Set getModelSpecificEngines() { return getProps().keySet().stream() .filter(k -> ((String) k).startsWith(GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE + '.')) .map(k -> GrobidCRFEngine.get(StringUtils.lowerCase(getPropertyValue((String) k)))) .distinct() - .collect(Collectors.toList()); + .collect(Collectors.toSet()); } protected static void loadCrfEngine() {