Skip to content

Commit

Permalink
Merge pull request #563 from kermitt2/keep_affiliations_after_consoli…
Browse files Browse the repository at this point in the history
…dation

keep the extracted affiliations if none found from consolidation.
  • Loading branch information
kermitt2 committed Aug 12, 2020
2 parents c7b4b20 + f1c3cf9 commit 9bb1380
Show file tree
Hide file tree
Showing 3 changed files with 295 additions and 45 deletions.
98 changes: 64 additions & 34 deletions grobid-core/src/main/java/org/grobid/core/data/BiblioItem.java
Original file line number Diff line number Diff line change
Expand Up @@ -4243,22 +4243,30 @@ public static void correct(BiblioItem bib, BiblioItem bibo) {

// authors present in fullAuthors list should be in the existing resources
// at least the corresponding author
if (bibo.getFullAuthors() != null) {
if ( (bib.getFullAuthors() == null) || (bib.getFullAuthors().size() == 0) )
if (!CollectionUtils.isEmpty(bibo.getFullAuthors())) {
if (CollectionUtils.isEmpty(bib.getFullAuthors()))
bib.setFullAuthors(bibo.getFullAuthors());
else if (bibo.getFullAuthors().size() == 1) {
// we have the corresponding author
// we have the corresponding author
// check if the author exists in the obtained list
Person auto = (Person) bibo.getFullAuthors().get(0);
List<Person> auts = bib.getFullAuthors();
if (auts != null) {
for (Person aut : auts) {
if (aut.getLastName() != null) {
if (aut.getLastName().equals(auto.getLastName())) {
aut.setCorresp(true);
if (StringUtils.isNotBlank(auto.getEmail()))
aut.setEmail(auto.getEmail());
// should we also check the country ? affiliation?
if (StringUtils.isNotBlank(aut.getLastName()) && StringUtils.isNotBlank(auto.getLastName())) {
if (aut.getLastName().toLowerCase().equals(auto.getLastName().toLowerCase())) {
if (StringUtils.isBlank(aut.getFirstName()) ||
(auto.getFirstName() != null &&
aut.getFirstName().length() <= auto.getFirstName().length() &&
auto.getFirstName().toLowerCase().startsWith(aut.getFirstName().toLowerCase()))) {
aut.setFirstName(auto.getFirstName());
aut.setCorresp(true);
if (StringUtils.isNotBlank(auto.getEmail()))
aut.setEmail(auto.getEmail());
// should we also check the country ? affiliation?
if (StringUtils.isNotBlank(auto.getMiddleName()) && (StringUtils.isBlank(aut.getMiddleName())))
aut.setMiddleName(auto.getMiddleName());
}
}
}
}
Expand All @@ -4270,31 +4278,54 @@ else if (bibo.getFullAuthors().size() == 1) {
for (Person aut : bibo.getFullAuthors()) {
// try to find the author in the first item (we know it's not empty)
for (Person aut2 : bib.getFullAuthors()) {


if (StringUtils.isNotBlank(aut2.getLastName())) {
if (StringUtils.isNotBlank(aut.getLastName()) && aut.getLastName().equals(aut2.getLastName())) {
// check also first name if present - at least for the initial
if ( StringUtils.isNotBlank(aut2.getFirstName()) && StringUtils.isNotBlank(aut.getFirstName()) ) {
// we have a match (full first name)
if (StringUtils.isBlank(aut.getMiddleName()))
aut.setMiddleName(aut2.getMiddleName());
if (StringUtils.isBlank(aut.getTitle()))
aut.setTitle(aut2.getTitle());
if (StringUtils.isBlank(aut.getSuffix()))
aut.setSuffix(aut2.getSuffix());
break;
} else if ( StringUtils.isNotBlank(aut.getFirstName()) &&
StringUtils.isNotBlank(aut2.getFirstName()) &&
(aut.getFirstName().length() == 1) &&
(aut.getFirstName().equals(aut2.getFirstName().substring(0,1))) ) {
// we have a match (initial)
aut.setFirstName(aut2.getFirstName());
if (StringUtils.isBlank(aut.getMiddleName()))
aut.setMiddleName(aut2.getMiddleName());
if (StringUtils.isBlank(aut.getTitle()))
aut.setTitle(aut2.getTitle());
if (StringUtils.isBlank(aut.getSuffix()))
aut.setSuffix(aut2.getSuffix());
break;
String aut2_lastname = aut2.getLastName().toLowerCase();

if (StringUtils.isNotBlank(aut.getLastName())) {
String aut_lastname = aut.getLastName().toLowerCase();

if (aut_lastname.equals(aut2_lastname)) {
// check also first name if present - at least for the initial
if ( StringUtils.isBlank(aut2.getFirstName()) ||
(StringUtils.isNotBlank(aut2.getFirstName()) && StringUtils.isNotBlank(aut.getFirstName())) ) {
// we have no first name or a match (full first name)

if ( StringUtils.isBlank(aut2.getFirstName())
||
aut.getFirstName().equals(aut2.getFirstName())
||
( aut.getFirstName().length() == 1 &&
aut.getFirstName().equals(aut2.getFirstName().substring(0,1)) )
) {
// we have a match (full or initial)
if (StringUtils.isNotBlank(aut2.getFirstName()) &&
aut2.getFirstName().length() > aut.getFirstName().length())
aut.setFirstName(aut2.getFirstName());
if (StringUtils.isBlank(aut.getMiddleName()))
aut.setMiddleName(aut2.getMiddleName());
if (StringUtils.isBlank(aut.getTitle()))
aut.setTitle(aut2.getTitle());
if (StringUtils.isBlank(aut.getSuffix()))
aut.setSuffix(aut2.getSuffix());
if (StringUtils.isBlank(aut.getORCID()))
aut.setORCID(aut2.getORCID());
if (StringUtils.isBlank(aut.getEmail()))
aut.setEmail(aut2.getEmail());
if(!CollectionUtils.isEmpty(aut2.getAffiliations()))
aut.setAffiliations(aut2.getAffiliations());
if (!CollectionUtils.isEmpty(aut2.getAffiliationBlocks()))
aut.setAffiliationBlocks(aut2.getAffiliationBlocks());
if (!CollectionUtils.isEmpty(aut2.getAffiliationMarkers()))
aut.setAffiliationMarkers(aut2.getAffiliationMarkers());
if (!CollectionUtils.isEmpty(aut2.getMarkers()))
aut.setMarkers(aut2.getMarkers());
if (!CollectionUtils.isEmpty(aut2.getLayoutTokens()))
aut.setLayoutTokens(aut2.getLayoutTokens());
break;
}
}
}
}
}
Expand All @@ -4303,7 +4334,6 @@ else if (bibo.getFullAuthors().size() == 1) {
bib.setFullAuthors(bibo.getFullAuthors());
}
}
//System.out.println("result: \n" + bib.toTEI(0));
}

/**
Expand Down
79 changes: 68 additions & 11 deletions grobid-core/src/main/java/org/grobid/core/data/Person.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ public class Person {
private String rawName = null; // raw full name if relevant/available, e.g. name exactly as displayed
private String orcid = null;
private boolean corresp = false;
private List<LayoutToken> layoutTokens = new ArrayList<>();

private List<LayoutToken> layoutTokens = new ArrayList<>();
private List<String> affiliationBlocks = null;
private List<Affiliation> affiliations = null;
private List<String> affiliationMarkers = null;
Expand Down Expand Up @@ -116,6 +116,10 @@ public List<String> getAffiliationBlocks() {
return affiliationBlocks;
}

public void setAffiliationBlocks(List<String> blocks) {
this.affiliationBlocks = blocks;
}

public void addAffiliationBlocks(String f) {
if (affiliationBlocks == null)
affiliationBlocks = new ArrayList<String>();
Expand All @@ -128,14 +132,18 @@ public List<org.grobid.core.data.Affiliation> getAffiliations() {

public void addAffiliation(org.grobid.core.data.Affiliation f) {
if (affiliations == null)
affiliations = new ArrayList<org.grobid.core.data.Affiliation>();
affiliations = new ArrayList<>();
affiliations.add(f);
}

public List<String> getAffiliationMarkers() {
return affiliationMarkers;
}

public void setAffiliationMarkers(List<String> affiliationMarkers) {
this.affiliationMarkers = affiliationMarkers;
}

public void addAffiliationMarker(String s) {
if (affiliationMarkers == null)
affiliationMarkers = new ArrayList<String>();
Expand All @@ -150,6 +158,10 @@ public List<String> getMarkers() {
return markers;
}

public void setMarkers(List<String> markers) {
this.markers = markers;
}

public void addMarker(String f) {
if (markers == null)
markers = new ArrayList<String>();
Expand All @@ -176,6 +188,35 @@ public boolean notNull() {
return true;
}

/**
* Create a new instance of Person object from current instance (shallow copy)
*/
public Person clonePerson() {
Person person = new Person();
person.firstName = this.firstName ;
person.middleName = this.middleName;
person.lastName = this.lastName;
person.title = this.title;
person.suffix = this.suffix;
person.rawName = this.rawName;
person.orcid = this.orcid;
person.corresp = this.corresp;
person.email = this.email;

if (this.layoutTokens != null)
person.layoutTokens = new ArrayList<>(this.layoutTokens);
if (this.affiliationBlocks != null)
person.affiliationBlocks = new ArrayList<>(this.affiliationBlocks);
if (this.affiliations != null)
person.affiliations = new ArrayList<>(this.affiliations);
if (this.affiliationMarkers != null)
person.affiliationMarkers = new ArrayList<>(this.affiliationMarkers);
if (this.markers != null)
person.markers = new ArrayList<>(this.markers);

return person;
}

public String toString() {
String res = "";
if (title != null)
Expand Down Expand Up @@ -206,6 +247,10 @@ public List<LayoutToken> getLayoutTokens() {
return layoutTokens;
}

public void setLayoutTokens(List<LayoutToken> tokens) {
this.layoutTokens = tokens;
}

/**
* TEI serialization via xom.
*/
Expand All @@ -228,23 +273,23 @@ public String toTEI(boolean withCoordinates) {
XmlBuilderUtils.addCoords(persElement, LayoutTokensUtil.getCoordsString(getLayoutTokens()));
}
if (title != null) {
persElement.appendChild(XmlBuilderUtils.teiElement("roleName", TextUtilities.HTMLEncode(title)));
persElement.appendChild(XmlBuilderUtils.teiElement("roleName", title));
}
if (firstName != null) {
Element forename = XmlBuilderUtils.teiElement("forename", TextUtilities.HTMLEncode(firstName));
Element forename = XmlBuilderUtils.teiElement("forename", firstName);
forename.addAttribute(new Attribute("type", "first"));
persElement.appendChild(forename);
}
if (middleName != null) {
Element mn = XmlBuilderUtils.teiElement("forename", TextUtilities.HTMLEncode(middleName));
Element mn = XmlBuilderUtils.teiElement("forename", middleName);
mn.addAttribute(new Attribute("type", "middle"));
persElement.appendChild(mn);
}
if (lastName != null) {
persElement.appendChild(XmlBuilderUtils.teiElement("surname", TextUtilities.HTMLEncode(lastName)));
persElement.appendChild(XmlBuilderUtils.teiElement("surname", lastName));
}
if (suffix != null) {
persElement.appendChild(XmlBuilderUtils.teiElement("genName", TextUtilities.HTMLEncode(suffix)));
persElement.appendChild(XmlBuilderUtils.teiElement("genName", suffix));
}

return XmlBuilderUtils.toXml(persElement);
Expand Down Expand Up @@ -531,23 +576,31 @@ public static List<Person> deduplicate(List<Person> persons) {
for(int j=0; j < localPersons.size(); j++) {
Person localPerson = localPersons.get(j);
String localFirstName = localPerson.getFirstName();
if (localFirstName != null)
if (localFirstName != null) {
localFirstName = localFirstName.toLowerCase();
localFirstName = localFirstName.replaceAll("[\\-\\.]", "");
}
String localMiddleName = localPerson.getMiddleName();
if (localMiddleName != null)
if (localMiddleName != null) {
localMiddleName = localMiddleName.toLowerCase();
localMiddleName = localMiddleName.replaceAll("[\\-\\.]", "");
}
int nbClash = 0;
for(int k=0; k < localPersons.size(); k++) {
boolean clash = false;
if (k == j)
continue;
Person otherPerson = localPersons.get(k);
String otherFirstName = otherPerson.getFirstName();
if (otherFirstName != null)
if (otherFirstName != null) {
otherFirstName = otherFirstName.toLowerCase();
otherFirstName = otherFirstName.replaceAll("[\\-\\.]", "");
}
String otherMiddleName = otherPerson.getMiddleName();
if (otherMiddleName != null)
if (otherMiddleName != null) {
otherMiddleName = otherMiddleName.toLowerCase();
otherMiddleName = otherMiddleName.replaceAll("[\\-\\.]", "");
}

// test first name clash
if (localFirstName != null && otherFirstName != null) {
Expand Down Expand Up @@ -657,6 +710,10 @@ public static List<Person> deduplicate(List<Person> persons) {
localSuffix = localPerson.getSuffix().toLowerCase();
}

String otherOrcid = otherPerson.getORCID();
if (otherOrcid != null)
localPerson.setORCID(otherOrcid);

if (otherPerson.getAffiliations() != null) {
for(Affiliation affOther : otherPerson.getAffiliations()) {
localPerson.addAffiliation(affOther);
Expand Down
Loading

0 comments on commit 9bb1380

Please sign in to comment.