Skip to content

Commit

Permalink
Merge 05d999f into ff10968
Browse files Browse the repository at this point in the history
  • Loading branch information
de-code committed Jul 27, 2020
2 parents ff10968 + 05d999f commit 13fdea7
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,7 @@ public static synchronized GenericTagger getTagger(GrobidModel model, GrobidCRFE
// LOGGER.info("Loading JEP native library for DeLFT failed", e);
// }

// if model is fulltext or segmentation we use currently WAPITI as fallback because they
// are not covered by DeLFT for the moment
if (model.equals(GrobidModels.FULLTEXT) || model.equals(GrobidModels.SEGMENTATION))
t = new WapitiTagger(model);
else
t = new DeLFTTagger(model);
t = new DeLFTTagger(model);
break;
default:
throw new IllegalStateException("Unsupported Grobid sequence labelling engine: " + GrobidProperties.getGrobidCRFEngine());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ private void init() {
loadCrfEngine();
}

private static void loadCrfEngine() {
protected static void loadCrfEngine() {
grobidCRFEngine = GrobidCRFEngine.get(getPropertyValue(GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE,
GrobidCRFEngine.WAPITI.name()));
}
Expand Down Expand Up @@ -699,15 +699,36 @@ public static File getPdfToXMLPath() {
return pathToPdfToXml;
}

private static String getModelPropertySuffix(final String modelName) {
return modelName.replaceAll("-", "_");
}

private static String getGrobidCRFEngineName(final String modelName) {
String defaultEngineName = GrobidProperties.getGrobidCRFEngine().name();
return getPropertyValue(
GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE + "." + getModelPropertySuffix(modelName),
defaultEngineName
);
}

public static GrobidCRFEngine getGrobidCRFEngine(final String modelName) {
String engineName = getGrobidCRFEngineName(modelName);
if (grobidCRFEngine.name().equals(engineName)) {
return grobidCRFEngine;
}
return GrobidCRFEngine.get(engineName);
}

public static GrobidCRFEngine getGrobidCRFEngine(final GrobidModel model) {
return getGrobidCRFEngine(model.getModelName());
}

public static GrobidCRFEngine getGrobidCRFEngine() {
return grobidCRFEngine;
}

public static File getModelPath(final GrobidModel model) {
String extension = grobidCRFEngine.getExt();
if (GrobidProperties.getGrobidCRFEngine() == GrobidCRFEngine.DELFT &&
(model.getModelName().equals("fulltext") || model.getModelName().equals("segmentation")))
extension = "wapiti";
String extension = getGrobidCRFEngine(model).getExt();
return new File(get_GROBID_HOME_PATH(), FOLDER_NAME_MODELS + File.separator
+ model.getFolderName() + File.separator
+ FILE_NAME_MODEL + "." + extension);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.apache.commons.lang3.StringUtils;
import org.grobid.core.GrobidModel;
import org.grobid.core.GrobidModels;
import org.grobid.core.engines.tagging.GrobidCRFEngine;
import org.grobid.core.exceptions.GrobidPropertyException;
import org.junit.After;
import org.junit.Before;
Expand Down Expand Up @@ -173,15 +174,97 @@ public void testsetisResourcesInHome() {
GrobidProperties.isResourcesInHome());
}

//@Test
public void testGetModelPath() {
GrobidModels value = GrobidModels.DATE;
assertEquals("The property has not the value expected",
new File(GrobidProperties.get_GROBID_HOME_PATH(),
GrobidProperties.FOLDER_NAME_MODELS + File.separator
+ value.getFolderName() + File.separator
+ GrobidProperties.FILE_NAME_MODEL),
GrobidProperties.getModelPath(value));
@Test
public void testShouldReturnWapitiAsDefaultEngine() {
GrobidProperties.getProps().remove(GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE);
GrobidProperties.loadCrfEngine();
assertEquals(
"engine",
GrobidCRFEngine.WAPITI,
GrobidProperties.getGrobidCRFEngine("dummy")
);
}

@Test
public void testShouldReturnConfiguredEngineIfNotConfiguredForModel() {
GrobidProperties.getProps().put(
GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE,
GrobidCRFEngine.DELFT.name()
);
GrobidProperties.loadCrfEngine();
assertEquals(
"engine",
GrobidCRFEngine.DELFT,
GrobidProperties.getGrobidCRFEngine("model1")
);
}

@Test
public void testShouldAllowModelSpecificEngineConfiguration() {
GrobidProperties.getProps().put(
GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE,
GrobidCRFEngine.WAPITI.name()
);
GrobidProperties.getProps().put(
GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE + "."
+ GrobidModels.SEGMENTATION.getModelName(),
GrobidCRFEngine.DELFT.name()
);
GrobidProperties.getProps().put(
GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE + "."
+ GrobidModels.FULLTEXT.getModelName(),
GrobidCRFEngine.DELFT.name()
);
GrobidProperties.loadCrfEngine();
assertEquals(
"segmentation engine",
GrobidCRFEngine.DELFT,
GrobidProperties.getGrobidCRFEngine(GrobidModels.SEGMENTATION)
);
assertEquals(
"fulltext engine",
GrobidCRFEngine.DELFT,
GrobidProperties.getGrobidCRFEngine(GrobidModels.FULLTEXT)
);
}


@Test
public void testShouldReplaceHyphenWithUnderscoreForModelSpecificEngineConfiguration() {
GrobidProperties.getProps().put(
GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE,
GrobidCRFEngine.WAPITI.name()
);
GrobidProperties.getProps().put(
GrobidPropertyKeys.PROP_GROBID_CRF_ENGINE + "."
+ "model_name1",
GrobidCRFEngine.DELFT.name()
);
GrobidProperties.loadCrfEngine();
assertEquals(
"segmentation engine",
GrobidCRFEngine.DELFT,
GrobidProperties.getGrobidCRFEngine("model-name1")
);
}

@Test
public void testShouldReturnModelPathWithExtension() {
GrobidModels model = GrobidModels.DATE;
String extension = GrobidProperties.getGrobidCRFEngine(model).getExt();
assertEquals(
"model path for " + model.name(),
new File(GrobidProperties.get_GROBID_HOME_PATH(),
GrobidProperties.FOLDER_NAME_MODELS
+ File.separator
+ model.getFolderName()
+ File.separator
+ GrobidProperties.FILE_NAME_MODEL
+ "."
+ extension
).getAbsoluteFile(),
GrobidProperties.getModelPath(model).getAbsoluteFile()
);
}

//@Test
Expand Down
6 changes: 6 additions & 0 deletions grobid-home/config/grobid.properties
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ grobid.proxy_port=null
grobid.crf.engine=wapiti
#grobid.crf.engine=delft
#grobid.crf.engine=crfpp

# we currently use wapiti for the fulltext and segmentation model because they
# are not covered by DeLFT for the moment
grobid.crf.engine.segmentation=wapiti
grobid.crf.engine.fulltext=wapiti

grobid.delft.install=../delft
grobid.delft.useELMo=false
grobid.delft.python.virtualEnv=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import org.apache.commons.lang3.tuple.ImmutablePair;
import org.grobid.core.GrobidModels;
import org.grobid.core.utilities.GrobidProperties;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
Expand All @@ -25,6 +26,11 @@ public class AbstractTrainerIntegrationTest {

private AbstractTrainer target;

@BeforeClass
public static void init() {
GrobidProperties.getInstance();
}

@BeforeClass
public static void beforeClass() throws Exception {
// LibraryLoader.load();
Expand Down

0 comments on commit 13fdea7

Please sign in to comment.