Skip to content

Commit

Permalink
fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Sep 24, 2022
1 parent 3cca788 commit 063f559
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 11 deletions.
Expand Up @@ -910,11 +910,7 @@ public StringBuilder toTEIBody(StringBuilder buffer,
}
buffer.append("\t\t<body>\n");

SortedSet<DocumentPiece> documentNoteParts = doc.getDocumentPart(SegmentationLabels.FOOTNOTE);
List<Note> notes = getTeiNotes(doc, documentNoteParts, Note.NoteType.FOOT);

documentNoteParts = doc.getDocumentPart(SegmentationLabels.MARGINNOTE);
notes.addAll(getTeiNotes(doc, documentNoteParts, Note.NoteType.MARGIN));
List<Note> notes = getTeiNotes(doc);

buffer = toTEITextPiece(buffer, result, biblio, bds, true,
layoutTokenization, figures, tables, equations, notes, markerTypes, doc, config);
Expand All @@ -927,6 +923,20 @@ public StringBuilder toTEIBody(StringBuilder buffer,
return buffer;
}

protected List<Note> getTeiNotes(Document doc) {
// There are two types of structured notes currently supported, foot notes and margin notes.
// We consider that head notes are always only presentation matter and are never references
// in a text body.

SortedSet<DocumentPiece> documentNoteParts = doc.getDocumentPart(SegmentationLabels.FOOTNOTE);
List<Note> notes = getTeiNotes(doc, documentNoteParts, Note.NoteType.FOOT);

documentNoteParts = doc.getDocumentPart(SegmentationLabels.MARGINNOTE);
notes.addAll(getTeiNotes(doc, documentNoteParts, Note.NoteType.MARGIN));

return notes;
}

protected List<Note> getTeiNotes(Document doc, SortedSet<DocumentPiece> documentNoteParts, Note.NoteType noteType) {

List<Note> notes = new ArrayList<>();
Expand Down
@@ -1,6 +1,6 @@
package org.grobid.core.document;

import org.grobid.core.data.Footnote;
import org.grobid.core.data.Note;
import org.grobid.core.engines.EngineParsers;
import org.grobid.core.engines.config.GrobidAnalysisConfig;
import org.grobid.core.main.LibraryLoader;
Expand Down Expand Up @@ -29,11 +29,11 @@ public void testGetTeiNotes() throws Exception {
File input = new File(this.getClass().getResource("/footnotes/test.pdf").toURI());
Document doc = engine.getSegmentationParser().processing(DocumentSource.fromPdf(input), GrobidAnalysisConfig.defaultInstance());

List<Footnote> teiNotes = new TEIFormatter(null, null).getTeiNotes(doc);
List<Note> teiNotes = new TEIFormatter(null, null).getTeiNotes(doc);

assertThat(teiNotes, hasSize(1));
assertThat(teiNotes.get(0).getText(), is(" http://wikipedia.org"));
assertThat(teiNotes.get(0).getNumber(), is(1));
assertThat(teiNotes.get(0).getLabel(), is("1"));
assertThat(teiNotes.get(0).getPageNumber(), is(1));
}

Expand Down
@@ -1,7 +1,7 @@
package org.grobid.core.document;

import org.grobid.core.analyzers.GrobidAnalyzer;
import org.grobid.core.data.Footnote;
import org.grobid.core.data.Note;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.LayoutTokensUtil;
Expand All @@ -26,11 +26,11 @@ public void testMakeFootNote() throws Exception {
String text = "1 This is a footnote";
List<LayoutToken> tokens = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(text);

Footnote footnote = new TEIFormatter(null, null).makeFootNote(tokens, text);
Note footnote = new TEIFormatter(null, null).makeNote(tokens, text, Note.NoteType.FOOT);

assertThat(footnote.getText(), is(" This is a footnote"));
assertThat(LayoutTokensUtil.toText(footnote.getTokens()), is(" This is a footnote"));
assertThat(footnote.getNumber(), is(1));
assertThat(footnote.getLabel(), is("1"));
}


Expand Down

0 comments on commit 063f559

Please sign in to comment.