Skip to content

Commit

Permalink
Try to fix the synchronisation of the language detection singleton in…
Browse files Browse the repository at this point in the history
…itialisation in case of multithread environment.
  • Loading branch information
lfoppiano committed May 23, 2020
1 parent d6ac4df commit e888436
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 42 deletions.
Expand Up @@ -42,7 +42,7 @@
public class BiblioItem {
protected static final Logger LOGGER = LoggerFactory.getLogger(BiblioItem.class);

LanguageUtilities languageUtilities = LanguageUtilities.getInstance();
private LanguageUtilities languageUtilities = LanguageUtilities.getInstance();
private AuthorEmailAssigner authorEmailAssigner = new ClassicAuthorEmailAssigner();
private EmailSanitizer emailSanitizer = new EmailSanitizer();
private String teiId;
Expand Down
Expand Up @@ -6,5 +6,5 @@
* Time: 11:03 AM
*/
public interface LanguageDetectorFactory {
public LanguageDetector getInstance();
LanguageDetector getInstance();
}
Expand Up @@ -2,8 +2,6 @@

import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LangDetectException;
import org.grobid.core.engines.Engine;
import org.grobid.core.factory.GrobidFactory;
import org.grobid.core.lang.LanguageDetector;
import org.grobid.core.lang.LanguageDetectorFactory;
import org.grobid.core.utilities.GrobidProperties;
Expand All @@ -19,7 +17,7 @@
*/
public class CybozuLanguageDetectorFactory implements LanguageDetectorFactory {
private static final Logger LOGGER = LoggerFactory.getLogger(CybozuLanguageDetectorFactory.class);
private static LanguageDetector instance = null;
private static volatile LanguageDetector instance = null;

private static void init() {
File profilePath = new File(GrobidProperties.getLanguageDetectionResourcePath(), "cybozu/profiles").getAbsoluteFile();
Expand All @@ -35,20 +33,17 @@ private static void init() {
}

public LanguageDetector getInstance() {
// synchronized (this.getClass()) {
if (instance == null) {
getNewInstance();
synchronized (this) {
if(instance == null) {
init();
LOGGER.debug("synchronized getNewInstance");
instance = new CybozuLanguageDetector();
}
}

}
// }
return instance;
}

/**
* return new instance.
*/
private synchronized void getNewInstance() {
init();
LOGGER.debug("synchronized getNewInstance");
instance = new CybozuLanguageDetector();
}
}
Expand Up @@ -8,38 +8,31 @@

/**
* Class for using language guessers (singleton).
*
*
* @author Patrice Lopez
* @author Vyacheslav Zholudev
*/
public class LanguageUtilities {
public static final Logger LOGGER = LoggerFactory
.getLogger(LanguageUtilities.class);

private static LanguageUtilities instance = null;
private static volatile LanguageUtilities instance = null;

private boolean useLanguageId = false;
private LanguageDetectorFactory ldf = null;

public static/* synchronized */LanguageUtilities getInstance() {
public static LanguageUtilities getInstance() {
if (instance == null) {
getNewInstance();
synchronized (LanguageUtilities.class) {
if (instance == null) {
LOGGER.debug("synchronized getNewInstance");
instance = new LanguageUtilities();
}
}
}
return instance;
}

/**
* Return a new instance.
*/
protected static synchronized void getNewInstance() {
// GrobidProperties.getInstance();
LOGGER.debug("synchronized getNewInstance");
instance = new LanguageUtilities();
}

/**
* Hidden constructor
*/
private LanguageUtilities() {
useLanguageId = GrobidProperties.isUseLanguageId();
if (useLanguageId) {
Expand Down Expand Up @@ -69,7 +62,7 @@ private LanguageUtilities() {
/**
* Basic run for language identification, return the language code and
* confidence score separated by a semicolon
*
*
* @param text
* text to classify
* @return language ids concatenated with ;
Expand All @@ -81,21 +74,21 @@ public Language runLanguageId(String text) {
try {
return ldf.getInstance().detect(text);
} catch (Exception e) {
LOGGER.warn("Cannot detect language because of: " + e.getClass().getName() + ": " + e.getMessage());
LOGGER.warn("Cannot detect language. ", e);
return null;
}
}

/**
* Less basic run for language identification, where a maxumum length of text is used to
* identify the language. The goal is to avoid wasting resources using a too long piece of
* text, when normally only a small chunk is enough for a safe language prediction.
* Less basic run for language identification, where a maxumum length of text is used to
* identify the language. The goal is to avoid wasting resources using a too long piece of
* text, when normally only a small chunk is enough for a safe language prediction.
* Return a Language object consisting of the language code and a confidence score.
*
*
* @param text
* text to classify
* @param maxLength
* maximum length of text to be used to identify the language, expressed in characters
* @param maxLength
* maximum length of text to be used to identify the language, expressed in characters
* @return language Language object consisting of the language code and a confidence score
*/
public Language runLanguageId(String text, int maxLength) {
Expand All @@ -105,10 +98,10 @@ public Language runLanguageId(String text, int maxLength) {
try {
int max = text.length();
if (maxLength < max)
max = maxLength;
max = maxLength;
return ldf.getInstance().detect(text.substring(0, max));
} catch (Exception e) {
LOGGER.warn("Cannot detect language because of: " + e.getClass().getName() + ": " + e.getMessage());
LOGGER.warn("Cannot detect language. ", e);
return null;
}
}
Expand Down

0 comments on commit e888436

Please sign in to comment.