Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
186 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
grobid-core/src/test/java/org/grobid/core/sax/PDFALTOOutlineSaxHandlerTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package org.grobid.core.sax; | ||
|
||
import org.grobid.core.document.Document; | ||
import org.grobid.core.document.DocumentSource; | ||
import org.grobid.core.document.DocumentNode; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
import javax.xml.parsers.SAXParser; | ||
import javax.xml.parsers.SAXParserFactory; | ||
|
||
import java.io.InputStream; | ||
|
||
import static org.easymock.EasyMock.createMock; | ||
import static org.hamcrest.CoreMatchers.is; | ||
import static org.hamcrest.CoreMatchers.nullValue; | ||
import static org.hamcrest.collection.IsCollectionWithSize.hasSize; | ||
import static org.junit.Assert.assertThat; | ||
import static org.junit.Assert.assertTrue; | ||
|
||
public class PDFALTOOutlineSaxHandlerTest { | ||
SAXParserFactory spf = SAXParserFactory.newInstance(); | ||
|
||
PDFALTOOutlineSaxHandler target; | ||
DocumentSource mockDocumentSource; | ||
Document document; | ||
|
||
@Before | ||
public void setUp() throws Exception { | ||
|
||
mockDocumentSource = createMock(DocumentSource.class); | ||
|
||
document = Document.createFromText(""); | ||
target = new PDFALTOOutlineSaxHandler(document); | ||
} | ||
|
||
@Test | ||
public void testParsing_pdf2XMLOutline_ShouldWork() throws Exception { | ||
InputStream is = this.getClass().getResourceAsStream("pdfalto.xml_outline.xml"); | ||
|
||
SAXParser p = spf.newSAXParser(); | ||
p.parse(is, target); | ||
|
||
DocumentNode root = target.getRootNode(); | ||
assertTrue(root.getChildren().size() > 0); | ||
assertThat(root.getChildren(), hasSize(9)); | ||
} | ||
|
||
@Test | ||
public void testParsing_pdf2XMLOutline_errorcase_ShouldWork() throws Exception { | ||
InputStream is = this.getClass().getResourceAsStream("test_outline.xml"); | ||
|
||
SAXParser p = spf.newSAXParser(); | ||
p.parse(is, target); | ||
|
||
DocumentNode root = target.getRootNode(); | ||
assertThat(root.getChildren(), hasSize(5)); | ||
assertThat(root.getChildren().get(0).getLabel(), is("A Identification")); | ||
assertThat(root.getChildren().get(0).getChildren(), is(nullValue())); | ||
assertThat(root.getChildren().get(1).getLabel(), is("B Résumé consolidé public.")); | ||
assertThat(root.getChildren().get(1).getChildren(), hasSize(1)); | ||
assertThat(root.getChildren().get(2).getLabel(), is("C Mémoire scientifique en français")); | ||
assertThat(root.getChildren().get(2).getChildren(), hasSize(6)); | ||
assertThat(root.getChildren().get(2).getChildren().get(2).getLabel(), is("C.3 Approche scientifique et technique")); | ||
assertThat(root.getChildren().get(3).getLabel(), is("D Liste des livrables")); | ||
assertThat(root.getChildren().get(3).getChildren(), is(nullValue())); | ||
assertThat(root.getChildren().get(4).getLabel(), is("E Impact du projet")); | ||
assertThat(root.getChildren().get(4).getChildren(), hasSize(4)); | ||
assertThat(root.getChildren().get(4).getChildren().get(1).getLabel(), is("E.2 Liste des publications et communications")); | ||
assertThat(root.getChildren().get(4).getChildren().get(2).getLabel(), is("E.3 Liste des autres valorisations scientifiques")); | ||
|
||
} | ||
|
||
} |
47 changes: 0 additions & 47 deletions
47
grobid-core/src/test/java/org/grobid/core/sax/PDFALTOOutlineSaxParserTest.java
This file was deleted.
Oops, something went wrong.
75 changes: 75 additions & 0 deletions
75
grobid-core/src/test/resources/org/grobid/core/sax/test_outline.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
<?xml version="1.0"?> | ||
<TOCITEMS nbPages="4"> | ||
<TOCITEMLIST level="0"> | ||
<ITEM ID="0"> | ||
<STRING>A Identification</STRING> | ||
<LINK page="2" top="71.0000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="1"> | ||
<STRING>B Résumé consolidé public.</STRING> | ||
<LINK page="2" top="377.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
<TOCITEMLIST level="1" idItemParent="1"> | ||
<ITEM ID="2"> | ||
<STRING>B.1 Résumé consolidé public en français</STRING> | ||
<LINK page="2" top="412.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
</TOCITEMLIST> | ||
</ITEM> | ||
<ITEM ID="3"> | ||
<STRING>C Mémoire scientifique en français</STRING> | ||
<LINK page="1" top="71.0000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
<TOCITEMLIST level="1" idItemParent="3"> | ||
<ITEM ID="4"> | ||
<STRING>C.1 Résumé du mémoire</STRING> | ||
<LINK page="1" top="109.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="5"> | ||
<STRING>C.2 Enjeux et problématique, état de l’art</STRING> | ||
<LINK page="1" top="334.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="6"> | ||
<STRING>C.3 Approche scientifique et technique</STRING> | ||
<LINK page="1" top="235.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="7"> | ||
<STRING>C.4 Résultats obtenus</STRING> | ||
<LINK page="1" top="264.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="8"> | ||
<STRING>C.5 Discussion, conclusion</STRING> | ||
<LINK page="1" top="357.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="9"> | ||
<STRING>C.6 Références</STRING> | ||
<LINK page="1" top="582.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
</TOCITEMLIST> | ||
</ITEM> | ||
<ITEM ID="10"> | ||
<STRING>D Liste des livrables</STRING> | ||
<LINK page="1" top="71.0000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="11"> | ||
<STRING>E Impact du projet</STRING> | ||
<LINK page="1" top="649.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
<TOCITEMLIST level="1" idItemParent="11"> | ||
<ITEM ID="12"> | ||
<STRING>E.1 Indicateurs d’impact</STRING> | ||
<LINK page="1" top="368.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="13"> | ||
<STRING>E.2 Liste des publications et communications</STRING> | ||
<LINK page="1" top="317.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="14"> | ||
<STRING>E.3 Liste des autres valorisations scientifiques</STRING> | ||
<LINK page="1" top="170.000" bottom="0.0000" left="68.0000" right="0.0000"/> | ||
</ITEM> | ||
<ITEM ID="15"> | ||
<STRING>E.4 Bilan et suivi des personnels recrutés en CDD (hors stagiaires)</STRING> | ||
<LINK page="1" top="318.000" bottom="0.0000" left="40.0000" right="0.0000"/> | ||
</ITEM> | ||
</TOCITEMLIST> | ||
</ITEM> | ||
</TOCITEMLIST> | ||
</TOCITEMS> |