Skip to content

Commit

Permalink
bettr cover one additional citation callout pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Jun 7, 2022
1 parent 81b0da1 commit c83d224
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
4 changes: 2 additions & 2 deletions doc/Introduction.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ Many thanks to:

* Vyacheslav Zholudev (Sum&Substance, formerly at ResearchGate)
* Achraf Azhar (CCSD)
* Laurent Romary (Inria)
* Daniel Ecer (eLife)
* Laurent Romary (Inria)
* Vitalii Bezsheiko (PKP)
* Bryan Newbold (Internet Archive)
* Christopher Boumenot (Microsoft) in particular for the Windows support
Expand All @@ -82,4 +82,4 @@ Many thanks to:
* The JEP team for their great JVM CPython embedding solution
* Taku Kudo for CRF++ (not used anymore, but all the same, thanks!)
* Hervé Déjean and his colleagues from Xerox Research Centre Europe, for xml2pdf
* and the other contributors: Jakob Fix, Tanti Kristanti, Bryan Newbold, Dmitry Katsubo, Phil Gooch, Romain Loth, Maud Medves, Chris Mattmann, Sujen Shah, Joseph Boyd, Guillaume Muller, ...
* and the other contributors: @elonzh, Jakob Fix, Tanti Kristanti, Bryan Newbold, Dmitry Katsubo, Phil Gooch, Romain Loth, Maud Medves, Chris Mattmann, Sujen Shah, Joseph Boyd, Guillaume Muller, ...
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
import org.apache.lucene.util.Version;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.data.BiblioItem;;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.LayoutTokensUtil;
import org.grobid.core.utilities.Pair;
Expand Down Expand Up @@ -278,7 +279,7 @@ private List<MatchResult> matchAuthorCitation(String text, List<LayoutToken> ref
List<BibDataSet> matches = authorMatcher.match(c);
if (matches.size() == 1) {
cntManager.i(ReferenceMarkerMatcherCounters.MATCHED_REF_MARKERS);
//System.out.println("MATCHED: " + text + "\n" + c + "\n" + matches.get(0).getRawBib());
//System.out.println("MATCHED: " + text + "\n" + c + "\n" + matches.get(0).getRawBib());
results.add(new MatchResult(c, splitItem, matches.get(0)));
} else {
if (matches.size() != 0) {
Expand Down Expand Up @@ -405,14 +406,33 @@ private List<BibDataSet> postFilterMatches(String c, List<BibDataSet> matches) {
String[] sp = c.trim().split(" ");
//callouts often include parentheses as seen in https://grobid.readthedocs.io/en/latest/training/fulltext/
final String author = sp[0].replaceAll("[\\(\\[]", "").toLowerCase();

ArrayList<BibDataSet> bibDataSets = Lists.newArrayList(Iterables.filter(matches, new Predicate<BibDataSet>() {
@Override
public boolean apply(BibDataSet bibDataSet) {
// first author last name formatted raw bib
return bibDataSet.getRawBib().trim().toLowerCase().startsWith(author);
}
}));

if (bibDataSets.size() == 1) {
return bibDataSets;
}

bibDataSets = Lists.newArrayList(Iterables.filter(matches, new Predicate<BibDataSet>() {
@Override
public boolean apply(BibDataSet bibDataSet) {
BiblioItem resBib = bibDataSet.getResBib();
if (resBib == null)
return false;
String firstAuthorLastName = resBib.getFirstAuthorSurname();
if (firstAuthorLastName == null)
return false;
firstAuthorLastName = firstAuthorLastName.toLowerCase();
// first author forename last name formatted raw bib
return firstAuthorLastName.equals(author);
}
}));

if (bibDataSets.size() <= 1) {
return bibDataSets;
}
Expand Down

0 comments on commit c83d224

Please sign in to comment.