Skip to content

Commit

Permalink
fix problematic dash / minus sign which is not parsed by the NumberFo…
Browse files Browse the repository at this point in the history
…rmat class
  • Loading branch information
lfoppiano committed Dec 13, 2022
1 parent 484cd0e commit 1b71c43
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
4 changes: 4 additions & 0 deletions src/main/java/org/grobid/core/engines/QuantityParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static org.apache.commons.collections4.CollectionUtils.isEmpty;
Expand All @@ -55,6 +56,8 @@ public class QuantityParser extends AbstractParser {
// private EnglishTokenizer tokeniser;
private boolean disableSubstanceParser = false;

public static Pattern MINUS_SIGN_REGEX= Pattern.compile("\u2212");

public static QuantityParser getInstance(boolean disableSubstance) {
if (instance == null) {
instance = getNewInstance(disableSubstance);
Expand Down Expand Up @@ -115,6 +118,7 @@ public List<Measurement> process(List<LayoutToken> layoutTokens) {
//Normalisation
List<LayoutToken> layoutTokenNormalised = tokens.stream().map(layoutToken -> {
layoutToken.setText(UnicodeUtil.normaliseText(layoutToken.getText()));
layoutToken.setText(MINUS_SIGN_REGEX.matcher(layoutToken.getText()).replaceAll("-"));

return layoutToken;
}
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/org/grobid/core/engines/ValueParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ protected BigDecimal parseValueBlock(ValueBlock block, Locale locale) {
try {
BigDecimal secondPart = null;
if (block.getPow() != null && block.getBase() != null) {

final Number pow = format.parse(block.getPowAsString());
String baseAsString = removeSpacesTabsAndBl(block.getBaseAsString());

Expand All @@ -112,7 +113,7 @@ protected BigDecimal parseValueBlock(ValueBlock block, Locale locale) {
}

} catch (ParseException | ArithmeticException | NumberFormatException e) {
LOGGER.error("Cannot parse " + block.toString() + " with Locale " + locale, e);
LOGGER.error("Cannot parse " + block + " with Locale " + locale, e);
}

break;
Expand Down

0 comments on commit 1b71c43

Please sign in to comment.