Skip to content

Commit

Permalink
Merge e888436 into d6ac4df
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed May 23, 2020
2 parents d6ac4df + e888436 commit 1cc9478
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 42 deletions.
Expand Up @@ -42,7 +42,7 @@
public class BiblioItem {
protected static final Logger LOGGER = LoggerFactory.getLogger(BiblioItem.class);

LanguageUtilities languageUtilities = LanguageUtilities.getInstance();
private LanguageUtilities languageUtilities = LanguageUtilities.getInstance();
private AuthorEmailAssigner authorEmailAssigner = new ClassicAuthorEmailAssigner();
private EmailSanitizer emailSanitizer = new EmailSanitizer();
private String teiId;
Expand Down
Expand Up @@ -6,5 +6,5 @@
* Time: 11:03 AM
*/
public interface LanguageDetectorFactory {
public LanguageDetector getInstance();
LanguageDetector getInstance();
}
Expand Up @@ -2,8 +2,6 @@

import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
import org.grobid.core.engines.Engine;
import org.grobid.core.factory.GrobidFactory;
import org.grobid.core.lang.LanguageDetector;
import org.grobid.core.lang.LanguageDetectorFactory;
import org.grobid.core.utilities.GrobidProperties;
Expand All @@ -19,7 +17,7 @@
*/
public class CybozuLanguageDetectorFactory implements LanguageDetectorFactory {
private static final Logger LOGGER = LoggerFactory.getLogger(CybozuLanguageDetectorFactory.class);
private static LanguageDetector instance = null;
private static volatile LanguageDetector instance = null;

private static void init() {
File profilePath = new File(GrobidProperties.getLanguageDetectionResourcePath(), "cybozu/profiles").getAbsoluteFile();
Expand All @@ -35,20 +33,17 @@ private static void init() {
}

public LanguageDetector getInstance() {
// synchronized (this.getClass()) {
if (instance == null) {
getNewInstance();
synchronized (this) {
if(instance == null) {
init();
LOGGER.debug("synchronized getNewInstance");
instance = new CybozuLanguageDetector();
}
}

}
// }
return instance;
}

/**
* return new instance.
*/
private synchronized void getNewInstance() {
init();
LOGGER.debug("synchronized getNewInstance");
instance = new CybozuLanguageDetector();
}
}
Expand Up @@ -8,38 +8,31 @@

/**
* Class for using language guessers (singleton).
*
*
* @author Patrice Lopez
* @author Vyacheslav Zholudev
*/
public class LanguageUtilities {
public static final Logger LOGGER = LoggerFactory
.getLogger(LanguageUtilities.class);

private static LanguageUtilities instance = null;
private static volatile LanguageUtilities instance = null;

private boolean useLanguageId = false;
private LanguageDetectorFactory ldf = null;

public static/* synchronized */LanguageUtilities getInstance() {
public static LanguageUtilities getInstance() {
if (instance == null) {
getNewInstance();
synchronized (LanguageUtilities.class) {
if (instance == null) {
LOGGER.debug("synchronized getNewInstance");
instance = new LanguageUtilities();
}
}
}
return instance;
}

/**
* Return a new instance.
*/
protected static synchronized void getNewInstance() {
// GrobidProperties.getInstance();
LOGGER.debug("synchronized getNewInstance");
instance = new LanguageUtilities();
}

/**
* Hidden constructor
*/
private LanguageUtilities() {
useLanguageId = GrobidProperties.isUseLanguageId();
if (useLanguageId) {
Expand Down Expand Up @@ -69,7 +62,7 @@ private LanguageUtilities() {
/**
* Basic run for language identification, return the language code and
* confidence score separated by a semicolon
*
*
* @param text
* text to classify
* @return language ids concatenated with ;
Expand All @@ -81,21 +74,21 @@ public Language runLanguageId(String text) {
try {
return ldf.getInstance().detect(text);
} catch (Exception e) {
LOGGER.warn("Cannot detect language because of: " + e.getClass().getName() + ": " + e.getMessage());
LOGGER.warn("Cannot detect language. ", e);
return null;
}
}

/**
* Less basic run for language identification, where a maxumum length of text is used to
* identify the language. The goal is to avoid wasting resources using a too long piece of
* text, when normally only a small chunk is enough for a safe language prediction.
* Less basic run for language identification, where a maxumum length of text is used to
* identify the language. The goal is to avoid wasting resources using a too long piece of
* text, when normally only a small chunk is enough for a safe language prediction.
* Return a Language object consisting of the language code and a confidence score.
*
*
* @param text
* text to classify
* @param maxLength
* maximum length of text to be used to identify the language, expressed in characters
* @param maxLength
* maximum length of text to be used to identify the language, expressed in characters
* @return language Language object consisting of the language code and a confidence score
*/
public Language runLanguageId(String text, int maxLength) {
Expand All @@ -105,10 +98,10 @@ public Language runLanguageId(String text, int maxLength) {
try {
int max = text.length();
if (maxLength < max)
max = maxLength;
max = maxLength;
return ldf.getInstance().detect(text.substring(0, max));
} catch (Exception e) {
LOGGER.warn("Cannot detect language because of: " + e.getClass().getName() + ": " + e.getMessage());
LOGGER.warn("Cannot detect language. ", e);
return null;
}
}
Expand Down

0 comments on commit 1cc9478

Please sign in to comment.