diff --git a/resources/dataset/original/pdf/new2/inserm-00508113.pdf b/resources/dataset/original/pdf/new2/inserm-00508113.pdf deleted file mode 100644 index 010a0611..00000000 Binary files a/resources/dataset/original/pdf/new2/inserm-00508113.pdf and /dev/null differ diff --git a/src/test/java/org/grobid/core/engines/UnitParserIntegrationTest.java b/src/test/java/org/grobid/core/engines/UnitParserIntegrationTest.java index 9a02b567..99419796 100644 --- a/src/test/java/org/grobid/core/engines/UnitParserIntegrationTest.java +++ b/src/test/java/org/grobid/core/engines/UnitParserIntegrationTest.java @@ -153,7 +153,7 @@ public void testTagUnit8() throws Exception { @Test public void testTagUnit9() throws Exception { - String input = "Dbs"; + String input = "Db s"; List output = target.tagUnit(input, false); System.out.println(input + " -> " + output); @@ -283,6 +283,43 @@ public void resultExtraction_C_divided_hours() throws Exception { assertThat(blocks.get(1).getRawTaggedValue(), is("°C /h")); } + @Test + public void resultExtraction_multiple_divisionMarks() throws Exception { + String result = "k\t0\t0\t1\t1\tNOPUNCT\t0\tI-\n" + + "m\t0\t0\t1\t1\tNOPUNCT\t0\tI-\n" + + "/\t1\t0\t0\t0\tSLASH\t0\tI-\n" + + "h\t0\t0\t1\t1\tNOPUNCT\t0\tI-\n" + + "/\t1\t0\t0\t0\tSLASH\t0\tI-\n" + + "m\t0\t0\t1\t1\tNOPUNCT\t0\tI-\n" + + "l\t0\t0\t1\t0\tNOPUNCT\t0\tI-\n" + + "/\t1\t0\t0\t0\tSLASH\t0\tI-\n" + + "k\t0\t0\t1\t1\tNOPUNCT\t0\tI-\n" + + "c\t0\t0\t1\t1\tNOPUNCT\t0\tI-\n" + + "a\t0\t0\t1\t1\tNOPUNCT\t0\t\n" + + "l\t0\t0\t1\t0\tNOPUNCT\t0\t"; + +// target.tagUnit("km/h/ml/kcal"); + + List blocks = target.resultExtraction(result, generateTokenisation("km/h/ml/kcal")); + assertThat(blocks.size(), is(4)); + assertThat(blocks.get(0).getPrefix(), is("k")); + assertThat(blocks.get(0).getBase(), is("m")); + + assertThat(blocks.get(1).getBase(), is("h")); + assertThat(blocks.get(1).getPow(), is("-1")); + + assertThat(blocks.get(2).getPrefix(), is("m")); + assertThat(blocks.get(2).getBase(), is("l")); + assertThat(blocks.get(2).getPow(), is("-1")); + + assertThat(blocks.get(3).getPrefix(), is("k")); + assertThat(blocks.get(3).getBase(), is("cal")); + assertThat(blocks.get(3).getPow(), is("-1")); + + assertThat(blocks.get(0).getRawTaggedValue(), is("km/h/ml/kcal")); + + } + public static List generateTokenisation(String input) { List tokenisation = new ArrayList<>();