Skip to content

Commit

Permalink
Merge branch 'feature/work_100' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
rjyounes committed Sep 27, 2017
2 parents 70e1560 + 5ac57b1 commit 5f5113e
Show file tree
Hide file tree
Showing 43 changed files with 703 additions and 746 deletions.
6 changes: 6 additions & 0 deletions src/main/java/org/ld4l/bib2lod/entity/Entity.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.ld4l.bib2lod.datatypes.Datatype;
import org.ld4l.bib2lod.datatypes.Ld4lCustomDatatypes.BibDatatype;
import org.ld4l.bib2lod.ontology.DatatypeProp;
import org.ld4l.bib2lod.ontology.NamedIndividual;
import org.ld4l.bib2lod.ontology.ObjectProp;
Expand Down Expand Up @@ -285,6 +286,11 @@ public void addAttribute(DatatypeProp prop, String string, Datatype type) {
addAttribute(prop, new Attribute(string, type));
}

public void addLegacySourceDataAttribute(
DatatypeProp prop, String string) {
addAttribute(prop, string, BibDatatype.LEGACY_SOURCE_DATA);
}

public void addAttribute(DatatypeProp prop, int i) {
addAttribute(prop, new Attribute(i));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,6 @@

public class MarcxmlEntityBuilder extends BaseEntityBuilder {

/*
* TODO
* Record as a whole - done
* Field as a whole
* Field with one or more specific subfields
* Field iterating through each subfield (e.g., each subfield generates a distinct entity
* Field iterating through all subfields (or is that just field as a whole?)
*/

@SuppressWarnings("unused")
private static final Logger LOGGER = LogManager.getLogger();

Expand Down Expand Up @@ -194,8 +185,5 @@ protected Entity buildFromString(

return entity;
}





}
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,9 @@ public Entity build(BuildParams params) throws EntityBuilderException {
buildChildFromControlField(Ld4lIdentifierType.defaultType(),
adminMetadata, record, "001");

convert_040();
convert040();

convert_005();

convert005();

if (adminMetadata.isEmpty()) {
return null;
Expand Down Expand Up @@ -78,7 +77,7 @@ private void parseBuildParams(BuildParams params)
}
}

private void convert_040() throws EntityBuilderException {
private void convert040() throws EntityBuilderException {

MarcxmlDataField field = record.getDataField("040");

Expand Down Expand Up @@ -174,15 +173,15 @@ private void addDescriptionConventions(MarcxmlDataField field)
}
}

private void convert_005() throws EntityBuilderException {
private void convert005() throws EntityBuilderException {

MarcxmlControlField field_005 = record.getControlField("005");
MarcxmlControlField field = record.getControlField("005");

if (field_005 == null) {
if (field == null) {
return;
}

String value = field_005.getTextValue();
String value = field.getTextValue();
// Convert format 20130330145647.0 to 2013-03-30T14:56:47
if (! PATTERN_005.matcher(value).matches()) {
throw new EntityBuilderException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,8 @@ public class AgentBuilder extends MarcxmlEntityBuilder {
private static ObjectProp DEFAULT_RELATIONSHIP =
Ld4lObjectProp.HAS_AGENT;

private Entity agent;
private MarcxmlDataField field;
private Entity grandparent;
private String name;
private Entity parent;
private ObjectProp relationship;
private MarcxmlSubfield subfield;
Expand All @@ -34,17 +32,17 @@ public Entity build(BuildParams params) throws EntityBuilderException {

parseBuildParams(params);

buildAgent();
Entity agent = buildAgent();

parent.addRelationship(relationship, agent);
if (agent != null) {
parent.addRelationship(relationship, agent);
}

return agent;
}

private void reset() {
this.agent = null;
this.field = null;
this.name = null;
this.parent = null;
this.relationship = null;
this.subfield = null;
Expand All @@ -63,13 +61,12 @@ private void parseBuildParams(BuildParams params)
if (type != null && ! (type instanceof Ld4lAgentType)) {
throw new EntityBuilderException("Invalid agent type");
}

this.name = params.getValue();

this.subfield = (MarcxmlSubfield) params.getSubfield();
this.field = (MarcxmlDataField) params.getField();
if (name == null && subfield == null && field == null) {
throw new EntityBuilderException("A name value, subfield, or " +
"field is required to build an agent.");
if (subfield == null && field == null) {
throw new EntityBuilderException("A subfield or field is " +
"required to build an agent.");
}

this.relationship = params.getRelationship();
Expand All @@ -80,18 +77,75 @@ private void parseBuildParams(BuildParams params)
this.grandparent = params.getGrandparent();
}


private Entity buildAgent() {

Entity agent = null;

// Subfield only
if (field == null) {
agent = new Entity(Ld4lAgentType.defaultType());
agent.addAttribute(Ld4lDatatypeProp.NAME,
subfield.getTrimmedTextValue());

} else {
switch (field.getTag()) {
case "100":
agent = convert100();
break;
case "260":
agent = convert260();
break;
default:
break;
}
}

return agent;
}

private Entity convert100() {

// Person or Family type
Type type = field.getFirstIndicator() == 3 ?
Ld4lAgentType.FAMILY : Ld4lAgentType.PERSON;
Entity agent = new Entity(type);

// Name
agent.addAttribute(Ld4lDatatypeProp.NAME,
field.getSubfield('a').getTrimmedTextValue());

return agent;
}

private Entity convert260() {

Entity agent = new Entity(Ld4lAgentType.defaultType());

if (subfield != null) {
agent.addAttribute(Ld4lDatatypeProp.NAME,
subfield.getTrimmedTextValue());
}

agent = dedupeAgent(agent);

return agent;
}

/**
* If this agent duplicates an agent of another activity of the same
* type for the same bib resource, use that agent rather than creating a
* new one. Current deduping is based only on the agent name strings,
* since that is what is available in, e.g., MARC 260$b.
*/
private void dedupeAgent() {
private Entity dedupeAgent(Entity agent) {

if (grandparent == null) {
return;
return agent;
}

String name = agent.getValue(Ld4lDatatypeProp.NAME);

List<Entity> activities = grandparent.getChildren(
Ld4lObjectProp.HAS_ACTIVITY, parent.getType());
for (Entity activity : activities) {
Expand All @@ -101,77 +155,12 @@ private void dedupeAgent() {
String agentName =
existingAgent.getValue(Ld4lDatatypeProp.NAME);
if (name.equals(agentName)) {
agent = existingAgent;
return existingAgent;
}
}
}
}

private void buildAgent() {

// Use type specified in build params, if any.
if (type == null) {
// Otherwise determine type from input data.
type = getType();
}

this.agent = new Entity(type);

addAgentName();
}

dedupeAgent();
}

private void addAgentName() {

if (name == null) {
if (subfield != null) {
this.name = subfield.getTrimmedTextValue();
} else {
this.name = buildNameFromDataField(agent);
}
}

if (name != null) {
agent.addAttribute(Ld4lDatatypeProp.NAME, name);
}
}

/**
* Determines type from input data. Defaults to Ld4lAgentType default
* type. Never returns null.
*/
private Type getType() {

if (field != null && field.getTag().equals("100")) {
if (field.getFirstIndicator() == 3) {
return Ld4lAgentType.FAMILY;
} else {
return Ld4lAgentType.PERSON;
}
}

return Ld4lAgentType.defaultType();
}

private String buildNameFromDataField(Entity agent) {

/*
* Note that in field 100, ind1 value 0 means given name first,
* value 1 means family name first, but since for now we are not
* parsing the individual name parts we just set the generic name
* property.
*
* TODO: if first indicator == 1, split on comma, assign first part
* to family name and last part to given name.
*
* TODO Add other agent attributes from other subfields.
*/
if (field != null && field.getTag().equals("100")) {
return field.getSubfield('a').getTrimmedTextValue();
}

return null;
}
return agent;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ public void build() throws EntityBuilderException {

switch (field.getTag()) {
case "100":
convert_100();
convert100();
break;
default:
break;
}
}


private void convert_100() throws EntityBuilderException {
private void convert100() throws EntityBuilderException {

this.activity = new Entity(TYPE);
EntityBuilder builder = getBuilder(Ld4lAgentType.defaultType());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ private Entity build(MarcxmlDataField field, BuildParams params)
(MarcxmlSubfield) params.getSubfields().get(0);

if (field.getTag().equals("035")) {
identifier = convert_035(subfield);
identifier = convert035(subfield);
}

return identifier;
Expand All @@ -172,7 +172,7 @@ private Entity build(MarcxmlDataField field, BuildParams params)
* Builds an identifier from field 035. Returns null if the identifier
* value is already attached to the resource's AdminMetadata object.
*/
private Entity convert_035(MarcxmlSubfield subfield)
private Entity convert035(MarcxmlSubfield subfield)
throws EntityBuilderException {

Entity identifier;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ private void parseBuildParams(BuildParams params)
}

private void buildIdentifiers() throws EntityBuilderException {
convert_035();
convert035();
}

private void convert_035() throws EntityBuilderException {
private void convert035() throws EntityBuilderException {

// 035 is a repeating field
List<MarcxmlDataField> fields = record.getDataFields("035");
Expand Down Expand Up @@ -119,7 +119,6 @@ private void buildTitles() throws EntityBuilderException {

private void buildPhysicalDescriptions() throws EntityBuilderException {

// TODO Not sure yet if there are others.
buildExtent();

}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
package org.ld4l.bib2lod.entitybuilders.marcxml.ld4l;

import org.ld4l.bib2lod.datatypes.Ld4lCustomDatatypes.BibDatatype;
import org.ld4l.bib2lod.entity.Entity;
import org.ld4l.bib2lod.entitybuilders.BuildParams;
import org.ld4l.bib2lod.entitybuilders.marcxml.MarcxmlEntityBuilder;
import org.ld4l.bib2lod.ontology.Type;
import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp;

/**
* Builds an entity used to store unparsed, unnormalized legacy data. May or
* may not have a specific type assigned.
* Builds an entity used to store unparsed, unnormalized legacy data. Use
* when no specific type is assigned; if there is a specific type, use the
* builder for that type and add the datatype to the appropriate literal
* value.
*/
public class LegacySourceDataEntityBuilder extends MarcxmlEntityBuilder {

Expand All @@ -24,13 +24,7 @@ public Entity build(BuildParams params) throws EntityBuilderException {

Entity entity = new Entity();

entity.addAttribute(Ld4lDatatypeProp.LABEL, label,
BibDatatype.LEGACY_SOURCE_DATA);

Type type = params.getType();
if (type != null) {
entity.addType(type);
}
entity.addLegacySourceDataAttribute(Ld4lDatatypeProp.LABEL, label);

return entity;
}
Expand Down

0 comments on commit 5f5113e

Please sign in to comment.