Skip to content

Commit

Permalink
improving normalisation for complex units
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Jun 17, 2019
1 parent 40154c1 commit acf9ba4
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 76 deletions.
6 changes: 6 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@ dependencies {
// compile 'tech.units:indriya:2.0-SNAPSHOT'
// compile group: 'si.uom', name: 'si-units', version: '2.0-SNAPSHOT'
// compile group: 'si.uom', name: 'si-quantity', version: '2.0-SNAPSHOT'
// compile group: 'systems.uom', name: 'systems-quantity', version: '2.0-SNAPSHOT'
// compile group: 'systems.uom', name: 'systems-common', version: '2.0-SNAPSHOT'
// compile group: 'systems.uom', name: 'systems-unicode', version: '2.0-SNAPSHOT'
// compile group: 'systems.uom', name: 'systems-ucum', version: '2.0-SNAPSHOT'
// compile group: 'systems.uom', name: 'systems-ucum-java8', version: '2.0-SNAPSHOT'
// compile group: 'systems.uom', name: 'systems-unicode-java8', version: '2.0-SNAPSHOT'

// (2) UOM 1.3
compile 'tech.units:indriya:1.3'
Expand Down
28 changes: 25 additions & 3 deletions src/main/java/org/grobid/core/data/UnitBlock.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package org.grobid.core.data;

import org.apache.commons.lang3.StringUtils;

import java.util.List;

import static org.apache.commons.lang3.StringUtils.isNotEmpty;
Expand Down Expand Up @@ -74,13 +76,13 @@ public String toString() {
public String toXml() {
StringBuilder sb = new StringBuilder();

if(isNotEmpty(getPrefix())) {
if (isNotEmpty(getPrefix())) {
sb.append("<prefix>" + getPrefix() + "</prefix>");
}
if(isNotEmpty(getBase())) {
if (isNotEmpty(getBase())) {
sb.append("<base>" + getBase() + "</base>");
}
if(isNotEmpty(getPow())) {
if (isNotEmpty(getPow())) {
sb.append("<pow>" + getPow() + "</pow>");
}

Expand Down Expand Up @@ -142,6 +144,26 @@ public static String asString(List<UnitBlock> unitBlockList) {
}
}


public static String asString(UnitBlock ub) {
StringBuilder output = new StringBuilder();

if (StringUtils.isNotEmpty(ub.getPow())) {
if (ub.getPow().contains("−")) {
output.append("1/");
}
}

if (StringUtils.isNotEmpty(ub.getPrefix())) {
output.append(ub.getPrefix());
}
if (StringUtils.isNotEmpty(ub.getBase())) {
output.append(ub.getBase());
}

return output.toString();
}

public void setRawTaggedValue(String rawTaggedValue) {
this.rawTaggedValue = rawTaggedValue;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.apache.commons.collections4.CollectionUtils;
import org.grobid.core.data.Quantity;
import org.grobid.core.data.Unit;
import org.grobid.core.data.UnitBlock;
import org.grobid.core.data.UnitDefinition;
import org.grobid.core.utilities.MeasurementOperations;
import org.grobid.core.utilities.UnitUtilities;
Expand All @@ -13,7 +14,6 @@

import javax.measure.format.MeasurementParseException;
import javax.measure.format.UnitFormat;
import javax.measure.quantity.Length;
import javax.measure.spi.ServiceProvider;
import javax.measure.spi.UnitFormatService;
import java.math.BigDecimal;
Expand All @@ -31,9 +31,9 @@
* <p>
* Created by lfoppiano on 14.02.16.
*/
public class QuantityNormalizer {
public class QuantityNormaliser {

private static final Logger LOGGER = LoggerFactory.getLogger(QuantityNormalizer.class);
private static final Logger LOGGER = LoggerFactory.getLogger(QuantityNormaliser.class);
protected static final String UOM_DEFAULT_PROVIDER = "tec.uom.se.spi.DefaultServiceProvider";
protected static final String UCUM_PROVIDER = "systems.uom.ucum.internal.UCUMServiceProvider";
protected static final String UNICODE_PROVIDER = "systems.uom.unicode.internal.UnicodeServiceProvider";
Expand All @@ -46,9 +46,9 @@ public class QuantityNormalizer {

private MeasurementOperations measurementOperations;

private UnitNormalizer unitNormalizer;
private UnitNormaliser unitNormaliser;

public QuantityNormalizer() {
public QuantityNormaliser() {
for (ServiceProvider provider : ServiceProvider.available()) {
UnitFormatService formatService = provider.getUnitFormatService();

Expand All @@ -63,7 +63,7 @@ public QuantityNormalizer() {
}

measurementOperations = new MeasurementOperations();
unitNormalizer = new UnitNormalizer();
unitNormaliser = new UnitNormaliser();
}


Expand All @@ -72,7 +72,7 @@ public Quantity.Normalized normalizeQuantity(Quantity quantity) throws Normalisa
return null; //or throw new NormalizationException() :-)
}

Unit parsedUnit = unitNormalizer.parseUnit(quantity.getRawUnit());
Unit parsedUnit = unitNormaliser.parseUnit(quantity.getRawUnit());
quantity.setParsedUnit(parsedUnit);

javax.measure.Unit unit = tryParsing(parsedUnit);
Expand All @@ -97,7 +97,7 @@ public Quantity.Normalized normalizeQuantity(Quantity quantity) throws Normalisa
// composeUnit(quantity, normalizedQuantity, unit);

if (quantity.isNormalized()) {
UnitDefinition definition = unitNormalizer.findDefinition(quantity.getNormalizedQuantity().getUnit());
UnitDefinition definition = unitNormaliser.findDefinition(quantity.getNormalizedQuantity().getUnit());
if (definition != null) {
quantity.getNormalizedQuantity().getUnit().setUnitDefinition(definition);
}
Expand Down Expand Up @@ -141,20 +141,44 @@ protected javax.measure.Unit tryParsing(Unit parsedUnit) throws NormalisationExc
unit = formatService.parse(parsedUnit.getRawName());
break;
} catch (MeasurementParseException | IllegalArgumentException | UnsupportedOperationException e) {
// for (UnitBlock block : parsedUnit.getProductBlocks()) {
//
// }

LOGGER.warn("Cannot parse " + parsedUnit + " with " + formatService.getClass().getName(), e);

} catch (Throwable t) {
LOGGER.warn("Cannot parse " + parsedUnit + " with " + formatService.getClass().getName(), t);
}
}

if (unit == null) {
throw new NormalisationException("Cannot parse " + parsedUnit.getRawName() + " using "
+ Arrays.toString(parsers.toArray()));
List<javax.measure.Unit> unitList = new ArrayList<>();
for (UnitBlock block : parsedUnit.getProductBlocks()) {

for (UnitFormat formatService : parsers.stream().filter(Objects::nonNull).collect(Collectors.toList())) {
try {
unitList.add(formatService.parse(UnitBlock.asString(block)));
break;
} catch (MeasurementParseException | IllegalArgumentException | UnsupportedOperationException e) {
LOGGER.warn("Cannot parse " + block.toString() + " with " + formatService.getClass().getName(), e);
} catch (Throwable t) {
LOGGER.warn("Cannot parse " + block.toString() + " with " + formatService.getClass().getName(), t);
}
}
}

if (CollectionUtils.isEmpty(unitList) || unitList.size() != parsedUnit.getProductBlocks().size()) {
throw new NormalisationException("Cannot parse " + parsedUnit.getRawName() + " using "
+ Arrays.toString(parsers.toArray()));
}

javax.measure.Unit result = null;
for(int i = 0; i < unitList.size(); i++) {
if(i == 0) {
result = unitList.get(i);
} else {
result = result.multiply(unitList.get(i));
}
}

unit = result;

}

return unit;
Expand Down Expand Up @@ -237,8 +261,8 @@ protected javax.measure.Unit tryParsing(Unit parsedUnit) throws NormalisationExc
// }


public void setUnitNormalizer(UnitNormalizer unitNormalizer) {
this.unitNormalizer = unitNormalizer;
public void setUnitNormaliser(UnitNormaliser unitNormaliser) {
this.unitNormaliser = unitNormaliser;
}

public Map<String, UnitFormat> getUnitFormats() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@
/**
* Created by lfoppiano on 23/03/16.
*/
public class UnitNormalizer {
public class UnitNormaliser {

private UnitParser unitParser;
private QuantityLexicon quantityLexicon;

public UnitNormalizer(UnitParser unitParser, QuantityLexicon quantityLexicon) {
public UnitNormaliser(UnitParser unitParser, QuantityLexicon quantityLexicon) {
this.unitParser = unitParser;
this.quantityLexicon = quantityLexicon;
}

public UnitNormalizer() {
public UnitNormaliser() {
unitParser = UnitParser.getInstance();
quantityLexicon = QuantityLexicon.getInstance();
}
Expand Down
6 changes: 3 additions & 3 deletions src/main/java/org/grobid/core/engines/QuantityParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import org.grobid.core.data.Unit;
import org.grobid.core.data.Value;
import org.grobid.core.data.normalization.NormalisationException;
import org.grobid.core.data.normalization.QuantityNormalizer;
import org.grobid.core.data.normalization.QuantityNormaliser;
import org.grobid.core.engines.label.QuantitiesTaggingLabels;
import org.grobid.core.engines.label.TaggingLabel;
import org.grobid.core.exceptions.GrobidException;
Expand Down Expand Up @@ -45,7 +45,7 @@ public class QuantityParser extends AbstractParser {
private static volatile QuantityParser instance;
private ValueParser valueParser = ValueParser.getInstance();
private QuantifiedObjectParser quantifiedObjectParser;
private QuantityNormalizer quantityNormalizer = new QuantityNormalizer();
private QuantityNormaliser quantityNormaliser = new QuantityNormaliser();
private EnglishTokenizer tokeniser;
private boolean disableSubstanceParser = false;

Expand Down Expand Up @@ -246,7 +246,7 @@ private void normalizeQuantity(Quantity quantity) {
if (quantity.isNormalized())
return;
try {
Quantity.Normalized quantity1 = quantityNormalizer.normalizeQuantity(quantity);
Quantity.Normalized quantity1 = quantityNormaliser.normalizeQuantity(quantity);
if (quantity1 != null) {
quantity.setNormalizedQuantity(quantity1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import org.grobid.core.data.Quantity;
import org.grobid.core.data.Unit;
import org.grobid.core.data.UnitDefinition;
import org.grobid.core.data.normalization.UnitNormalizer;
import org.grobid.core.data.normalization.UnitNormaliser;
import org.grobid.core.layout.LayoutToken;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -26,13 +26,13 @@
public class MeasurementOperations {
private static final Logger logger = LoggerFactory.getLogger(MeasurementOperations.class);

UnitNormalizer un;
UnitNormaliser un;

public MeasurementOperations() {
un = new UnitNormalizer();
un = new UnitNormaliser();
}

public MeasurementOperations(UnitNormalizer un) {
public MeasurementOperations(UnitNormaliser un) {
this.un = un;
}

Expand Down

0 comments on commit acf9ba4

Please sign in to comment.