Skip to content

Commit

Permalink
Merge de7abbe into 0b42a3c
Browse files Browse the repository at this point in the history
  • Loading branch information
lfoppiano committed Apr 22, 2021
2 parents 0b42a3c + de7abbe commit 1577c4c
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 223 deletions.
45 changes: 15 additions & 30 deletions grobid-core/src/main/java/org/grobid/core/data/BiblioItem.java
Expand Up @@ -14,15 +14,11 @@
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.tokenization.TaggingTokenCluster;
import org.grobid.core.tokenization.TaggingTokenClusteror;
import org.grobid.core.utilities.LayoutTokensUtil;
import org.grobid.core.engines.label.TaggingLabel;
import org.grobid.core.engines.label.TaggingLabels;
import org.grobid.core.engines.tagging.GenericTaggerUtils;
import org.grobid.core.lexicon.Lexicon;
import org.grobid.core.utilities.LanguageUtilities;
import org.grobid.core.utilities.TextUtilities;
import org.grobid.core.utilities.KeyGen;
import org.grobid.core.utilities.Pair;
import org.grobid.core.GrobidModels;

import java.net.URLEncoder;
Expand Down Expand Up @@ -53,9 +49,12 @@ public class BiblioItem {
// map of labels (e.g. <title> or <abstract>) to LayoutToken
private Map<String, List<LayoutToken>> labeledTokens;

private List<LayoutToken> titleLayoutTokens = new ArrayList<>();
private List<LayoutToken> authorsLayoutTokens = new ArrayList<>();
private List<LayoutToken> abstractLayoutTokens = new ArrayList<>();
/**
* This is an internal structure not meant to be used outside. This is also modified with respect of other structures
* For collecting layout tokens of the various bibliographical component, please refers to @See(getLayoutTokens(TaggingLabels label)
*/
private List<LayoutToken> authorsLayoutTokensworkingCopy = new ArrayList<>();


@Override
public String toString() {
Expand Down Expand Up @@ -1178,12 +1177,12 @@ public void setAuthors(String aut) {
}

public BiblioItem addAuthorsToken(LayoutToken lt) {
authorsLayoutTokens.add(lt);
authorsLayoutTokensworkingCopy.add(lt);
return this;
}

public List<LayoutToken> getAuthorsTokens() {
return authorsLayoutTokens;
public void addAuthorsTokens(List<LayoutToken> layoutTokens) {
this.authorsLayoutTokensworkingCopy.addAll(layoutTokens);
}

public void addAuthor(String aut) {
Expand Down Expand Up @@ -4556,7 +4555,7 @@ public boolean rejectAsReference() {
// normally properties authors and authorList are null in the current Grobid version
if (!titleSet && !authorSet && (url == null) && (doi == null))
return true;
else
else
return false;
}

Expand Down Expand Up @@ -4606,7 +4605,7 @@ public void setLayoutTokensForLabel(List<LayoutToken> tokens, TaggingLabel heade
labeledTokens.put(headerLabel.getLabel(), tokens);
}

public void generalResultMapping(Document doc, String labeledResult, List<LayoutToken> tokenizations) {
public void generalResultMapping(String labeledResult, List<LayoutToken> tokenizations) {
if (labeledTokens == null)
labeledTokens = new TreeMap<>();

Expand All @@ -4624,27 +4623,13 @@ public void generalResultMapping(Document doc, String labeledResult, List<Layout
List<LayoutToken> clusterTokens = cluster.concatTokens();
List<LayoutToken> theList = labeledTokens.get(clusterLabel.getLabel());

if (theList == null)
theList = new ArrayList<>();
for (LayoutToken token : clusterTokens)
theList.add(token);
theList = theList == null ? new ArrayList<>() : theList;
theList.addAll(clusterTokens);
labeledTokens.put(clusterLabel.getLabel(), theList);
}
}

public void addTitleTokens(List<LayoutToken> layoutTokens) {
this.titleLayoutTokens.addAll(layoutTokens);
}

public void addAuthorsTokens(List<LayoutToken> layoutTokens) {
this.authorsLayoutTokens.addAll(layoutTokens);
}

public void addAbstractTokens(List<LayoutToken> layoutTokens) {
this.abstractLayoutTokens.addAll(layoutTokens);
}

public List<LayoutToken> getAbstractTokens() {
return this.abstractLayoutTokens;
public List<LayoutToken> getAuthorsWorkingCopyTokens() {
return authorsLayoutTokensworkingCopy;
}
}
Expand Up @@ -21,7 +21,6 @@
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.exceptions.GrobidExceptionStatus;
import org.grobid.core.features.FeatureFactory;
import org.grobid.core.features.FeaturesVectorHeader;
import org.grobid.core.layout.Block;
import org.grobid.core.layout.BoundingBox;
import org.grobid.core.layout.Cluster;
Expand Down Expand Up @@ -71,7 +70,6 @@
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -829,7 +827,7 @@ public static List<LayoutToken> getTokenizationParts(SortedSet<DocumentPiece> do
if (documentParts == null)
return null;

List<LayoutToken> tokenizationParts = new ArrayList<LayoutToken>();
List<LayoutToken> tokenizationParts = new ArrayList<>();
for (DocumentPiece docPiece : documentParts) {
DocumentPointer dp1 = docPiece.getLeft();
DocumentPointer dp2 = docPiece.getRight();
Expand Down
Expand Up @@ -186,8 +186,7 @@ public Document processing(DocumentSource documentSource,

// structure the abstract using the fulltext model
if (isNotBlank(resHeader.getAbstract())) {
//List<LayoutToken> abstractTokens = resHeader.getLayoutTokens(TaggingLabels.HEADER_ABSTRACT);
List<LayoutToken> abstractTokens = resHeader.getAbstractTokens();
List<LayoutToken> abstractTokens = resHeader.getLayoutTokens(TaggingLabels.HEADER_ABSTRACT);
if (CollectionUtils.isNotEmpty(abstractTokens)) {
abstractTokens = BiblioItem.cleanAbstractLayoutTokens(abstractTokens);
Pair<String, List<LayoutToken>> abstractProcessed = processShort(abstractTokens, doc);
Expand Down

0 comments on commit 1577c4c

Please sign in to comment.