Skip to content

Commit

Permalink
simplify properties/parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Nov 29, 2020
1 parent 2bd9f00 commit 820ad26
Show file tree
Hide file tree
Showing 15 changed files with 54 additions and 174 deletions.
2 changes: 1 addition & 1 deletion Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ GROBID can be considered as production ready. Deployments in production includes

GROBID should run properly "out of the box" on Linux (64 bits) and macOS. We cannot ensure currently support for Windows as we did before (help welcome!).

GROBID uses optionnally Deep Learning models relying on the [DeLFT](https://github.com/kermitt2/delft) library, a task-agnostic Deep Learning framework for sequence labelling and text classification. The tool can run with feature engineered CRF (default), deep learning architectures (with or without layout features) or a mixture of CRF and DL.
GROBID uses optionnally Deep Learning models relying on the [DeLFT](https://github.com/kermitt2/delft) library, a task-agnostic Deep Learning framework for sequence labelling and text classification. The tool can run with feature engineered CRF (default), Deep Learning architectures (with or without layout feature channels) or any mixtures of CRF and DL to balance scalability and accuracy.

For more information on how the tool works, on its key features and [benchmarking](https://grobid.readthedocs.io/en/latest/Benchmarking/), visit the [GROBID documentation](https://grobid.readthedocs.org).

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,7 @@ private final void initLanguageResources() {
InputStreamReader isr = null;
BufferedReader dis = null;
try {
if (GrobidProperties.isResourcesInHome())
ist = new FileInputStream(localFile);
else
ist = getClass().getResourceAsStream(path);
ist = new FileInputStream(localFile);
isr = new InputStreamReader(ist, "UTF8");
dis = new BufferedReader(isr);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ private File getAndValidateDelftPath() {
private JepConfig getJepConfig(File delftPath, Path sitePackagesPath) {
JepConfig config = new JepConfig();
config.addIncludePaths(delftPath.getAbsolutePath());
config.setRedirectOutputStreams(GrobidProperties.isDeLFTRedirectOutput());
//config.setRedirectOutputStreams(GrobidProperties.isDeLFTRedirectOutput());
config.setRedirectOutputStreams(true);
if (sitePackagesPath != null) {
config.addIncludePaths(sitePackagesPath.toString());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ private File getAndValidateDelftPath() {
private JepConfig getJepConfig(File delftPath, Path sitePackagesPath) {
JepConfig config = new JepConfig();
config.addIncludePaths(delftPath.getAbsolutePath());
config.setRedirectOutputStreams(GrobidProperties.isDeLFTRedirectOutput());
config.setRedirectOutputStreams(true);
if (sitePackagesPath != null) {
config.addIncludePaths(sitePackagesPath.toString());
}
Expand Down
40 changes: 6 additions & 34 deletions grobid-core/src/main/java/org/grobid/core/lexicon/Lexicon.java
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,7 @@ public final void addDictionary(String path, String lang) {
InputStreamReader isr = null;
BufferedReader dis = null;
try {
if (GrobidProperties.isResourcesInHome())
ist = new FileInputStream(file);
else
ist = getClass().getResourceAsStream(path);


ist = new FileInputStream(file);
isr = new InputStreamReader(ist, "UTF8");
dis = new BufferedReader(isr);

Expand Down Expand Up @@ -203,18 +198,11 @@ private void addCountryCodes(String path) {
file.getAbsolutePath() + "'.");
}
InputStream ist = null;
InputStreamReader isr = null;
BufferedReader dis = null;
//InputStreamReader isr = null;
//BufferedReader dis = null;
try {
if (GrobidProperties.isResourcesInHome())
ist = new FileInputStream(file);
else
ist = getClass().getResourceAsStream(path);

isr = new InputStreamReader(ist, "UTF8");
dis = new BufferedReader(isr);
ist = new FileInputStream(file);
CountryCodeSaxParser parser = new CountryCodeSaxParser(countryCodes, countries);

SAXParserFactory spf = SAXParserFactory.newInstance();
//get a new instance of parser
SAXParser p = spf.newSAXParser();
Expand All @@ -226,10 +214,6 @@ private void addCountryCodes(String path) {
try {
if (ist != null)
ist.close();
if (isr != null)
isr.close();
if (dis != null)
dis.close();
} catch (Exception e) {
throw new GrobidResourceException("Cannot close all streams.", e);
}
Expand All @@ -254,11 +238,7 @@ public final void addFirstNames(String path) {
InputStream ist = null;
BufferedReader dis = null;
try {
if (GrobidProperties.isResourcesInHome()) {
ist = new FileInputStream(file);
} else {
ist = getClass().getResourceAsStream(path);
}
ist = new FileInputStream(file);
dis = new BufferedReader(new InputStreamReader(ist, "UTF8"));

String l = null;
Expand All @@ -274,10 +254,8 @@ public final void addFirstNames(String path) {
}
}
} catch (FileNotFoundException e) {
// e.printStackTrace();
throw new GrobidException("An exception occured while running Grobid.", e);
} catch (IOException e) {
// e.printStackTrace();
throw new GrobidException("An exception occured while running Grobid.", e);
} finally {
try {
Expand All @@ -304,11 +282,7 @@ public final void addLastNames(String path) {
InputStream ist = null;
BufferedReader dis = null;
try {
if (GrobidProperties.isResourcesInHome())
ist = new FileInputStream(file);
else
ist = getClass().getResourceAsStream(path);

ist = new FileInputStream(file);
dis = new BufferedReader(new InputStreamReader(ist, "UTF8"));

String l = null;
Expand All @@ -324,10 +298,8 @@ public final void addLastNames(String path) {
}
}
} catch (FileNotFoundException e) {
// e.printStackTrace();
throw new GrobidException("An exception occured while running Grobid.", e);
} catch (IOException e) {
// e.printStackTrace();
throw new GrobidException("An exception occured while running Grobid.", e);
} finally {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,12 +451,6 @@ public static File getNativeLibraryPath() {
return new File(getPropertyValue(GrobidPropertyKeys.PROP_NATIVE_LIB_PATH));
}

/*public static boolean withSentenceSegmentation() {
return Utilities.stringToBoolean(
getPropertyValue(GrobidPropertyKeys.PROP_WITH_SENTENCE_SEGMENTATION, "false")
);
}*/

/**
* Returns the installation path of DeLFT if set, null otherwise. It is required for using
* a Deep Learning sequence labelling engine.
Expand All @@ -477,12 +471,6 @@ public static String getDeLFTFilePath() {
return pathFile.getAbsolutePath();
}

public static boolean isDeLFTRedirectOutput() {
return Utilities.stringToBoolean(
getPropertyValue(GrobidPropertyKeys.PROP_GROBID_DELFT_REDIRECT_OUTPUT)
);
}

public static String getGluttonHost() {
return getPropertyValue(GrobidPropertyKeys.PROP_GLUTTON_HOST);
}
Expand Down Expand Up @@ -658,19 +646,9 @@ public static void setNBThreads(final String nbThreads) {
setPropertyValue(GrobidPropertyKeys.PROP_NB_THREADS, nbThreads);
}

/**
* Returns if a language id shall be used, given in the grobid-property
* file.
*
* @return true if a language id shall be used
*/
public static Boolean isUseLanguageId() {
return Utilities.stringToBoolean(getPropertyValue(GrobidPropertyKeys.PROP_USE_LANG_ID));
}

public static String getLanguageDetectorFactory() {
String factoryClassName = getPropertyValue(GrobidPropertyKeys.PROP_LANG_DETECTOR_FACTORY);
if (isUseLanguageId() && (StringUtils.isBlank(factoryClassName))) {
if (StringUtils.isBlank(factoryClassName)) {
throw new GrobidPropertyException("Language detection is enabled but a factory class name is not provided");
}
return factoryClassName;
Expand All @@ -681,9 +659,9 @@ public static String getLanguageDetectorFactory() {
*
* @param useLanguageId true, if a language id shall be used
*/
public static void setUseLanguageId(final String useLanguageId) {
/*public static void setUseLanguageId(final String useLanguageId) {
setPropertyValue(GrobidPropertyKeys.PROP_USE_LANG_ID, useLanguageId);
}
}*/

public static String getSentenceDetectorFactory() {
String factoryClassName = getPropertyValue(GrobidPropertyKeys.PROP_SENTENCE_DETECTOR_FACTORY);
Expand All @@ -693,27 +671,6 @@ public static String getSentenceDetectorFactory() {
return factoryClassName;
}

/**
* Returns if resources like firstnames, lastnames and countries are
* supposed to be read from grobid-home folder, given in the grobid-property
* file.
*
* @return true if a language id shall be used
*/
public static Boolean isResourcesInHome() {
return Utilities.stringToBoolean(getPropertyValue(GrobidPropertyKeys.PROP_RESOURCE_INHOME, "true"));
}

/**
* Sets if resources like firstnames, lastnames and countries are supposed
* to be read from grobid-home folder, given in the grobid-property file.
*
* @param resourceInHome true, if a language id shall be used
*/
public static void setResourcesInHome(final String resourceInHome) {
setPropertyValue(GrobidPropertyKeys.PROP_RESOURCE_INHOME, resourceInHome);
}

/**
* Returns the path to the home folder of pdf to xml converter.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,19 @@ public interface GrobidPropertyKeys {

String PROP_GROBID_VERSION = "org.grobid.version";
String PROP_TMP_PATH = "grobid.temp.path";
// public static final String PROP_BIN_PATH = "grobid.bin.path";
String PROP_NATIVE_LIB_PATH = "grobid.nativelibrary.path";
String PROP_3RD_PARTY_PDFTOXML = "grobid.3rdparty.pdf2xml.path";
String PROP_3RD_PARTY_PDFTOXML_MEMORY_LIMIT = "grobid.3rdparty.pdf2xml.memory.limit.mb";
String PROP_3RD_PARTY_PDFTOXML_TIMEOUT_SEC = "grobid.3rdparty.pdf2xml.memory.timeout.sec";

String PROP_GROBID_CRF_ENGINE = "grobid.crf.engine";
String PROP_GROBID_DELFT_PATH = "grobid.delft.install";
String PROP_GROBID_DELFT_REDIRECT_OUTPUT = "grobid.delft.redirect_output";
String PROP_GROBID_DELFT_ELMO = "grobid.delft.useELMo";
String PROP_DELFT_ARCHITECTURE = "grobid.delft.architecture";
String PROP_USE_LANG_ID = "grobid.use_language_id";

String PROP_LANG_DETECTOR_FACTORY = "grobid.language_detector_factory";
String PROP_SENTENCE_DETECTOR_FACTORY = "grobid.sentence_detector_factory";

//String PROP_WITH_SENTENCE_SEGMENTATION = "grobid.with_sentence_segmentation";

String PROP_CROSSREF_ID = "grobid.crossref_id";
String PROP_CROSSREF_PW = "grobid.crossref_pw";
String PROP_CROSSREF_HOST = "grobid.crossref_host";
Expand All @@ -45,12 +41,6 @@ public interface GrobidPropertyKeys {
*/
String PROP_CROSSREF_TOKEN = "org.grobid.crossref.token";

/*String PROP_MYSQL_HOST = "grobid.mysql_host";
String PROP_MYSQL_PORT = "grobid.mysql_port";
String PROP_MYSQL_USERNAME = "grobid.mysql_username";
String PROP_MYSQL_PW = "grobid.mysql_passwd";
String PROP_MYSQL_DB_NAME = "grobid.mysql_db_name";*/

String PROP_PROXY_HOST = "grobid.proxy_host";
String PROP_PROXY_PORT = "grobid.proxy_port";

Expand All @@ -65,13 +55,6 @@ public interface GrobidPropertyKeys {
String PROP_GLUTTON_HOST = "org.grobid.glutton.host";
String PROP_GLUTTON_PORT = "org.grobid.glutton.port";

/**
* Determines if properties like the firstnames, lastnames country codes and
* dictionaries are supposed to be read from $GROBID_HOME path or not
* (possible values (true|false) default is false)
*/
String PROP_RESOURCE_INHOME = "grobid.resources.inHome";

/**
* The name of the env-entry located in the web.xml, via which the
* grobid-service.propeties path is set.
Expand All @@ -84,12 +67,6 @@ public interface GrobidPropertyKeys {
*/
String PROP_GROBID_PROPERTY = "org.grobid.property";

/**
* The name of the system property, via which the grobid home folder can be
* located.
*/
// String PROP_GROBID_SERVICE_PROPERTY = "org.grobid.property.service";

/**
* name of the property setting the admin password
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public class LanguageUtilities {

private static volatile LanguageUtilities instance = null;

private boolean useLanguageId = false;
//private boolean useLanguageId = false;
private LanguageDetectorFactory ldf = null;

public static LanguageUtilities getInstance() {
Expand All @@ -34,28 +34,25 @@ public static LanguageUtilities getInstance() {
}

private LanguageUtilities() {
useLanguageId = GrobidProperties.isUseLanguageId();
if (useLanguageId) {
String className = GrobidProperties.getLanguageDetectorFactory();
try {
ldf = (LanguageDetectorFactory) Class.forName(className)
.newInstance();
} catch (ClassCastException e) {
throw new GrobidException("Class " + className
+ " must implement "
+ LanguageDetectorFactory.class.getName(), e);
} catch (ClassNotFoundException e) {
throw new GrobidException(
"Class "
+ className
+ " were not found in the classpath. "
+ "Make sure that it is provided correctly is in the classpath", e);
} catch (InstantiationException e) {
throw new GrobidException("Class " + className
+ " should have a default constructor", e);
} catch (IllegalAccessException e) {
throw new GrobidException(e);
}
String className = GrobidProperties.getLanguageDetectorFactory();
try {
ldf = (LanguageDetectorFactory) Class.forName(className)
.newInstance();
} catch (ClassCastException e) {
throw new GrobidException("Class " + className
+ " must implement "
+ LanguageDetectorFactory.class.getName(), e);
} catch (ClassNotFoundException e) {
throw new GrobidException(
"Class "
+ className
+ " were not found in the classpath. "
+ "Make sure that it is provided correctly is in the classpath", e);
} catch (InstantiationException e) {
throw new GrobidException("Class " + className
+ " should have a default constructor", e);
} catch (IllegalAccessException e) {
throw new GrobidException(e);
}
}

Expand All @@ -68,9 +65,6 @@ private LanguageUtilities() {
* @return language ids concatenated with ;
*/
public Language runLanguageId(String text) {
if (!useLanguageId) {
return null;
}
try {
return ldf.getInstance().detect(text);
} catch (Exception e) {
Expand All @@ -92,9 +86,6 @@ public Language runLanguageId(String text) {
* @return language Language object consisting of the language code and a confidence score
*/
public Language runLanguageId(String text, int maxLength) {
if (!useLanguageId) {
return null;
}
try {
int max = text.length();
if (maxLength < max)
Expand Down
Loading

0 comments on commit 820ad26

Please sign in to comment.