Skip to content

Commit

Permalink
fix parsing and normalisation when spaces are in the way, added new u…
Browse files Browse the repository at this point in the history
…nits definitions
  • Loading branch information
lfoppiano committed Jan 13, 2023
1 parent d7da44a commit 298e4c0
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 11 deletions.
4 changes: 2 additions & 2 deletions src/main/java/org/grobid/core/data/UnitBlock.java
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,13 @@ public static String asString(List<UnitBlock> unitBlockList) {
firstNumerator = false;
}

numerator.append(ub.toString());
numerator.append(ub);
}
}
}

if (fraction) {
return numerator.append("/").append(denominator.toString()).toString();
return numerator.append("/").append(denominator).toString();
} else {
return numerator.toString();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.grobid.core.data.normalization;

import org.apache.commons.lang3.StringUtils;
import org.grobid.core.data.Unit;
import org.grobid.core.data.UnitBlock;
import org.grobid.core.data.UnitDefinition;
Expand Down Expand Up @@ -77,9 +78,16 @@ public Unit parseUnit(Unit rawUnit) {
parsedUnit.setUnitRightAttachment(rawUnit.hasUnitRightAttachment());

UnitDefinition def = quantityLexicon.getUnitByNotation(reformatted);
if (def == null) {
String reformattedWithoutSpaces = StringUtils.trim(reformatted).replace(" ", "");
def = quantityLexicon.getUnitByNotation(reformattedWithoutSpaces);
parsedUnit.setRawName(reformattedWithoutSpaces);
}

if (def == null) {
def = quantityLexicon.getUnitbyName(reformatted);
}

parsedUnit.setUnitDefinition(def);
return parsedUnit;
}
Expand Down
17 changes: 9 additions & 8 deletions src/main/java/org/grobid/core/engines/UnitParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public List<UnitBlock> resultExtraction(String result, List<LayoutToken> tokeniz
TaggingTokenClusteror clusteror = new TaggingTokenClusteror(QuantitiesModels.UNITS, result, tokenizations);
List<TaggingTokenCluster> clusters = clusteror.cluster();

int pos = 0; // position in term of characters for creating the offsets
int pos = 0; // position of characters for creating the offsets

boolean denominator = false;
int currentPow = 1;
Expand Down Expand Up @@ -178,21 +178,22 @@ public List<UnitBlock> resultExtraction(String result, List<LayoutToken> tokeniz
} else if (clusterLabel.equals(UNIT_VALUE_OTHER)) {
LOGGER.debug(clusterContent + "(O)");
} else if (clusterLabel.equals(UNIT_VALUE_POW)) {
if (clusterContent.equals("/")) {
String clusterWithoutSpaces = clusterContent.replace(" ", "");
if (clusterWithoutSpaces.equals("/")) {
denominator = true;
} else if (clusterContent.endsWith("/")) {
} else if (clusterWithoutSpaces.endsWith("/")) {
denominator = true;
unitBlock.setPow(clusterContent.replace("/", ""));
} else if (clusterContent.equals("*")) {
unitBlock.setPow(clusterWithoutSpaces.replace("/", ""));
} else if (clusterWithoutSpaces.equals("*")) {
//nothing to do
} else {
if (denominator) {
unitBlock.setPow("-" + clusterContent);
unitBlock.setPow("-" + clusterWithoutSpaces);
} else {
unitBlock.setPow(clusterContent);
unitBlock.setPow(clusterWithoutSpaces);
}
}
LOGGER.debug(clusterContent + "(P)");
LOGGER.debug(clusterWithoutSpaces + "(P)");
}
previousTag = clusterLabel;
}
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/grobid/core/lexicon/QuantityLexicon.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ private void processJsonNode(JsonNode node) {
}
if (unitDefinition.isSupportsPrefixes()) {
expandAndAdd(unitDefinition, rawNotation);
} else {
unitDefinition.addNotation(rawNotation);
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/org/grobid/core/utilities/UnitUtilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ public enum Unit_Type {
THERMAL_CONDUCTIVITY("thermal conductivity"),
THERMAL_DIFFUSIVITY("thermal diffusivity"),
HEAT_CAPACITY("specific heat capacity"),
VOLUMETRIC_HEAT_CAPACITY("volumetric heat capacity"),
EMISSION_RATE("emission rate"),
CATALYTIC_ACTIVITY("catalytic activity"),
RADIANCE("radiance"),
Expand All @@ -98,7 +99,8 @@ public enum Unit_Type {
DIFFUSION_FLUX("diffusion flux"),
MAGNETIC_MOMENT("magnetic moment"),
ATOM_MASS_UNIT("atom mass unit"),
PACE("pace");
PACE("pace"),
MAXIMUM_ENERGY_PRODUCT("maximum energy product");

private String name;

Expand Down
49 changes: 49 additions & 0 deletions src/main/resources/lexicon/en/units.json
Original file line number Diff line number Diff line change
Expand Up @@ -2343,6 +2343,55 @@
"inflection": "electron-volts"
}
]
},
{
"notations": [
{
"raw": "J K^-1 m^-3"
},
{
"raw": "J/K*m-3"
},
{
"raw": "J/(K*m^3)"
},
{
"raw": "J/(K*m³)"
}
],
"type": "VOLUMETRIC_HEAT_CAPACITY",
"system": "SI_DERIVED",
"supportsPrefixes": false,
"names": [
{
"lemma": "volumetric heat capacity"
}
]
},
{
"notations": [
{
"raw": "J m^-3"
},
{
"raw": "J*m^-3"
},
{
"raw": "J/m^3"
},
{
"raw": "J/m³"
}
],
"type": "MAXIMUM_ENERGY_PRODUCT",
"system": "SI_DERIVED",
"supportsPrefixes": false,
"names": [
{
"lemma": "maximum energy product",
"inflection": "Maximum energy products"
}
]
}
]
}
12 changes: 12 additions & 0 deletions src/test/java/org/grobid/core/data/UnitBlockTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,16 @@ public void testAsProductWithDenominator_2() {

assertThat(UnitBlock.asProduct(unitBlockList), is("mm^2·km^-2"));
}

@Test
public void testErrorCase() {

final List<UnitBlock> unitBlockList = Arrays.asList(
new UnitBlock("k", "J", ""),
new UnitBlock("", "m", "-3")
);

assertThat(UnitBlock.asString(unitBlockList), is("kJ/m^3"));
}

}

0 comments on commit 298e4c0

Please sign in to comment.