diff --git a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AdminMetadataBuilder.java b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AdminMetadataBuilder.java index a1cb07386..9b6ae59c7 100644 --- a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AdminMetadataBuilder.java +++ b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AdminMetadataBuilder.java @@ -3,7 +3,7 @@ import java.util.List; import java.util.regex.Pattern; -import org.ld4l.bib2lod.datatypes.XsdDatatype; +import org.ld4l.bib2lod.datatypes.Ld4lCustomDatatypes.BibDatatype; import org.ld4l.bib2lod.entity.Entity; import org.ld4l.bib2lod.entitybuilders.BuildParams; import org.ld4l.bib2lod.entitybuilders.EntityBuilder; @@ -193,7 +193,7 @@ private void convert005() throws EntityBuilderException { value.substring(8, 10) + ":" + value.substring(10,12) + ":" + value.substring(12, 14); adminMetadata.addAttribute(Ld4lDatatypeProp.CHANGE_DATE, - datetime, XsdDatatype.DATETIME); + datetime, BibDatatype.EDTF); } } diff --git a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AgentBuilder.java b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AgentBuilder.java index 083f52409..69ef5c03b 100644 --- a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AgentBuilder.java +++ b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AgentBuilder.java @@ -1,12 +1,13 @@ package org.ld4l.bib2lod.entitybuilders.marcxml.ld4l; +import java.util.Arrays; import java.util.List; +import org.apache.commons.lang3.StringUtils; import org.ld4l.bib2lod.entity.Entity; import org.ld4l.bib2lod.entitybuilders.BuildParams; import org.ld4l.bib2lod.entitybuilders.marcxml.MarcxmlEntityBuilder; import org.ld4l.bib2lod.ontology.ObjectProp; -import org.ld4l.bib2lod.ontology.Type; import org.ld4l.bib2lod.ontology.ld4l.Ld4lAgentType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp; import org.ld4l.bib2lod.ontology.ld4l.Ld4lObjectProp; @@ -23,7 +24,6 @@ public class AgentBuilder extends MarcxmlEntityBuilder { private Entity parent; private ObjectProp relationship; private MarcxmlSubfield subfield; - private Type type; @Override public Entity build(BuildParams params) throws EntityBuilderException { @@ -56,12 +56,7 @@ private void parseBuildParams(BuildParams params) throw new EntityBuilderException( "A parent entity is required to build an agent."); } - - this.type = params.getType(); - if (type != null && ! (type instanceof Ld4lAgentType)) { - throw new EntityBuilderException("Invalid agent type"); - } - + this.subfield = (MarcxmlSubfield) params.getSubfield(); this.field = (MarcxmlDataField) params.getField(); if (subfield == null && field == null) { @@ -85,6 +80,7 @@ private Entity buildAgent() { // Subfield only if (field == null) { agent = new Entity(Ld4lAgentType.defaultType()); + // TODO Add legacySourceData datatype? agent.addAttribute(Ld4lDatatypeProp.NAME, subfield.getTrimmedTextValue()); @@ -106,15 +102,48 @@ private Entity buildAgent() { private Entity convert100() { - // Person or Family type - Type type = field.getFirstIndicator() == 3 ? - Ld4lAgentType.FAMILY : Ld4lAgentType.PERSON; - Entity agent = new Entity(type); - - // Name - agent.addAttribute(Ld4lDatatypeProp.NAME, - field.getSubfield('a').getTrimmedTextValue()); + Entity agent = new Entity(); + MarcxmlSubfield subfield$a = field.getSubfield('a'); + // Family + if (field.getFirstIndicator() == 3) { + agent.addType(Ld4lAgentType.FAMILY); + + // Name + if (subfield$a != null) { + agent.addLegacySourceDataAttribute(Ld4lDatatypeProp.NAME, + subfield$a.getTrimmedTextValue()); + } + + // Person + } else { + agent.addType(Ld4lAgentType.PERSON); + + // Person name: concatenate $a (name) $b (numeration) $c (titles + // and other words associated with the name) + if (subfield$a != null) { + + String name = field.concatenateSubfieldValues( + Arrays.asList('a', 'b', 'c', 'q')); + if (name.endsWith(",")) { + name = StringUtils.chop(name); + } + agent.addLegacySourceDataAttribute(Ld4lDatatypeProp.NAME, + name); + } + + // Person birth and death dates: variable values, no attempt to + // parse at this time, so use dcterms:date instead of + // schema:birthDate, schema:deathDate. + // Examples: "1775-1817", "d. 1683", "282-133 B.C." + // "dd. ca. 1558", "d1240 or 41-ca. 1316" + MarcxmlSubfield subfield$d = field.getSubfield('d'); + if (subfield$d != null) { + agent.addLegacySourceDataAttribute(Ld4lDatatypeProp.DATE, + subfield$d.getTextValue()); + } + } + return agent; } @@ -163,4 +192,6 @@ private Entity dedupeAgent(Entity agent) { return agent; } + + } diff --git a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/InstanceBuilder.java b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/InstanceBuilder.java index 1f82974d6..e0e3d3af0 100644 --- a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/InstanceBuilder.java +++ b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/InstanceBuilder.java @@ -2,11 +2,9 @@ package org.ld4l.bib2lod.entitybuilders.marcxml.ld4l; -import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.ld4l.bib2lod.entity.Entity; @@ -35,6 +33,11 @@ public class InstanceBuilder extends MarcxmlEntityBuilder { @SuppressWarnings("unused") private static final Logger LOGGER = LogManager.getLogger(); + + public static List _260_PUBLISHER_CODES = + Arrays.asList('a', 'b', 'c'); + public static List _260_MANUFACTURER_CODES = + Arrays.asList('e', 'f', 'g'); private InstanceEntity instance; private MarcxmlRecord record; @@ -50,15 +53,15 @@ public Entity build(BuildParams params) throws EntityBuilderException { // Admin metadata is built from multiple fields buildChildFromRecord( Ld4lAdminMetadataType.defaultType(), instance, record); - + + buildWorks(); + buildItem(); buildIdentifiers(); buildTitles(); buildActivities(); buildProvisionActivityStatements(); buildResponsiblityStatement(); buildPhysicalDescriptions(); - buildWorks(); - buildItem(); return instance; } @@ -173,7 +176,6 @@ private void buildItem() throws EntityBuilderException { private void buildActivities() throws EntityBuilderException { buildPublisherActivities(); buildManufacturerActivities(); - buildProviderActivities(); } private void buildPublisherActivities() throws EntityBuilderException { @@ -190,11 +192,10 @@ private void buildPublisherActivities() throws EntityBuilderException { // 260 fields: build additional publisher activities and add data to // current publisher activity from 008. - List publisherCodes = Arrays.asList('a', 'b', 'c'); for (MarcxmlDataField field : record.getDataFields("260")) { params.setField(field); List> subfieldLists = ProviderActivityBuilder. - getActivitySubfields(field, publisherCodes); + getActivitySubfields(field, _260_PUBLISHER_CODES); for (List subfields : subfieldLists) { params.setSubfields(subfields); @@ -217,20 +218,13 @@ private void buildProvisionActivityStatements() { private void buildProvisionActivityStatements( List tags, List codes) { - for (MarcxmlDataField field : record.getDataFields(tags)) { - - List textValues = new ArrayList<>(); - - for (MarcxmlSubfield subfield : field.getSubfields(codes)) { - textValues.add(subfield.getTextValue()); - } - - if (textValues.size() > 0) { - String statement = StringUtils.join(textValues, " "); + for (MarcxmlDataField field : record.getDataFields(tags)) { + String statement = field.concatenateSubfieldValues(codes); + if (statement != null) { instance.addAttribute (Ld4lDatatypeProp.PROVISION_ACTIVITY_STATEMENT, - statement); - } + statement); + } } } @@ -244,12 +238,11 @@ private void buildManufacturerActivities() throws EntityBuilderException { .setRecord(record); // Build manufacturer activities from 260$e$f$g - List manufacturerCodes = Arrays.asList('e', 'f', 'g'); for (MarcxmlDataField field : record.getDataFields("260")) { params.setField(field); List> subfieldLists = ProviderActivityBuilder.getActivitySubfields( - field, manufacturerCodes); + field, _260_MANUFACTURER_CODES); for (List subfields : subfieldLists) { params.setField(field) @@ -259,12 +252,8 @@ private void buildManufacturerActivities() throws EntityBuilderException { } } - private void buildProviderActivities() throws EntityBuilderException { - - } - /** - * Add responsibility statement to instance from 245$c. + * Adds responsibility statement 245$c. */ private void buildResponsiblityStatement() { diff --git a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/LocationBuilder.java b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/LocationBuilder.java index eab679fa5..91302f332 100644 --- a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/LocationBuilder.java +++ b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/LocationBuilder.java @@ -5,7 +5,6 @@ import org.ld4l.bib2lod.entity.Entity; import org.ld4l.bib2lod.entitybuilders.BuildParams; import org.ld4l.bib2lod.entitybuilders.marcxml.MarcxmlEntityBuilder; -import org.ld4l.bib2lod.ontology.Type; import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp; import org.ld4l.bib2lod.ontology.ld4l.Ld4lLocationType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lObjectProp; @@ -18,7 +17,6 @@ public class LocationBuilder extends MarcxmlEntityBuilder { private String name; private Entity parent; private MarcxmlSubfield subfield; - private Type type; @Override public Entity build(BuildParams params) throws EntityBuilderException { @@ -49,7 +47,6 @@ private void reset() { this.name = null; this.parent = null; this.subfield = null; - this.type = null; } private void parseBuildParams(BuildParams params) diff --git a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/TitleElementBuilder.java b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/TitleElementBuilder.java index 98f46a81f..e999a62f6 100644 --- a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/TitleElementBuilder.java +++ b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/TitleElementBuilder.java @@ -28,7 +28,7 @@ public Entity build(BuildParams params) throws EntityBuilderException { * correctly reconstruct the title: E.g., French "L'" vs. "Le ". */ if (! type.equals(Ld4lTitleElementType.NON_SORT_ELEMENT)) { - value = XmlTextElement.removeFinalPunctAndWhitespace( + value = XmlTextElement.trimFinalPunctAndWhitespace( value).trim(); } diff --git a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ActivityBuilder.java b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ActivityBuilder.java index c9caf4e8e..16837306d 100644 --- a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ActivityBuilder.java +++ b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ActivityBuilder.java @@ -9,6 +9,8 @@ import org.ld4l.bib2lod.entity.Entity; import org.ld4l.bib2lod.entitybuilders.BuildParams; import org.ld4l.bib2lod.entitybuilders.marcxml.MarcxmlEntityBuilder; +import org.ld4l.bib2lod.ontology.DatatypeProp; +import org.ld4l.bib2lod.ontology.Type; import org.ld4l.bib2lod.ontology.ld4l.Ld4lActivityType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp; import org.ld4l.bib2lod.ontology.ld4l.Ld4lObjectProp; @@ -21,13 +23,18 @@ public class ActivityBuilder extends MarcxmlEntityBuilder { @SuppressWarnings("unused") private static final Logger LOGGER = LogManager.getLogger(); + + private static final Ld4lActivityType DEFAULT_TYPE = + (Ld4lActivityType) Ld4lActivityType.defaultType(); protected Entity activity; - protected Entity parent; protected MarcxmlTaggedField field; - protected List subfields; + protected Entity parent; + protected DatatypeProp property; protected MarcxmlRecord record; + protected List subfields; protected Ld4lActivityType type; + protected String value; @Override public Entity build(BuildParams params) throws EntityBuilderException { @@ -52,9 +59,11 @@ private void reset() { this.activity = null; this.parent = null; this.field = null; + this.property = null; this.subfields = new ArrayList<>(); this.record = null; this.type = null; + this.value = null; } private void parseBuildParams(BuildParams params) @@ -70,16 +79,35 @@ private void parseBuildParams(BuildParams params) this.record = (MarcxmlRecord) params.getRecord(); RecordField field = params.getField(); - if (field == null) { + if (field != null) { + if (! (field instanceof MarcxmlTaggedField)) { + throw new EntityBuilderException("A data field or control " + + "field is required to build an activity"); + } + this.field = (MarcxmlTaggedField) field; + } + + this.property = params.getProperty(); + this.value = params.getValue(); + + if (field == null && property == null) { throw new EntityBuilderException( - "A field is required to build an activity."); + "A field or property and value is required to build an activity."); + } + if (field == null && value == null) { + throw new EntityBuilderException("A field or property and " + + "value is required to build an activity."); } - if (! (field instanceof MarcxmlTaggedField)) { - throw new EntityBuilderException("A data field or control " + - "field is required to build an activity"); - } - this.field = (MarcxmlTaggedField) field; + Type type = params.getType(); + if (type != null) { + if (! (type instanceof Ld4lActivityType)) { + throw new EntityBuilderException("Invalid type."); + } + this.type = (Ld4lActivityType) type; + } else { + this.type = DEFAULT_TYPE; + } /* * This needs to be a list of MarcxmlSubfields in order @@ -92,7 +120,13 @@ private void parseBuildParams(BuildParams params) } protected void build() throws EntityBuilderException { - // If never used, make this an abstract class. + + if (property == null || value == null) { + throw new EntityBuilderException( + "A property and value are needed to build a generic Activity."); + } + this.activity = new Entity(type); + activity.addAttribute(property, value); } } diff --git a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ManufacturerActivityBuilder.java b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ManufacturerActivityBuilder.java index 7afe195ad..c64e83caa 100644 --- a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ManufacturerActivityBuilder.java +++ b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ManufacturerActivityBuilder.java @@ -34,7 +34,7 @@ private void convert260() buildLocation(datafield.getSubfield('e')); buildAgent(datafield.getSubfield('f')); - buildDate(datafield.getSubfield('g')); + buildUntypedDate(datafield.getSubfield('g')); } } diff --git a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ProviderActivityBuilder.java b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ProviderActivityBuilder.java index 13e772451..67143af8c 100644 --- a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ProviderActivityBuilder.java +++ b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ProviderActivityBuilder.java @@ -7,7 +7,6 @@ import org.apache.logging.log4j.Logger; import org.ld4l.bib2lod.entitybuilders.BuildParams; import org.ld4l.bib2lod.entitybuilders.EntityBuilder; -import org.ld4l.bib2lod.ontology.ld4l.Ld4lActivityType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lAgentType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp; import org.ld4l.bib2lod.ontology.ld4l.Ld4lLocationType; @@ -15,17 +14,19 @@ import org.ld4l.bib2lod.records.xml.marcxml.MarcxmlDataField; import org.ld4l.bib2lod.records.xml.marcxml.MarcxmlSubfield; +// TODO Might be abstract - do we ever build a generic provider activity? public class ProviderActivityBuilder extends ActivityBuilder { @SuppressWarnings("unused") private static final Logger LOGGER = LogManager.getLogger(); - @SuppressWarnings("unused") - private static final Ld4lActivityType TYPE = - Ld4lActivityType.PROVIDER_ACTIVITY; - + + /** + * Builds an untyped date attribute from a subfield with uncontrolled + * values (e.g., 260$c as opposed to the controlled 008 values). + */ // TODO Move up to ActivityBuilder if it works for other activities - protected void buildDate(MarcxmlSubfield subfield) + protected void buildUntypedDate(MarcxmlSubfield subfield) throws EntityBuilderException { if (subfield == null) { @@ -33,8 +34,6 @@ protected void buildDate(MarcxmlSubfield subfield) } String date = subfield.getTrimmedTextValue(); - // Unlike the controlled 008 date, the 260$c date value is an - // untyped literal. activity.addAttribute(Ld4lDatatypeProp.DATE, date); } diff --git a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/PublisherActivityBuilder.java b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/PublisherActivityBuilder.java index bd956f57c..4828889de 100644 --- a/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/PublisherActivityBuilder.java +++ b/src/main/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/PublisherActivityBuilder.java @@ -5,11 +5,14 @@ import org.apache.logging.log4j.Logger; import org.ld4l.bib2lod.datatypes.Ld4lCustomDatatypes.BibDatatype; import org.ld4l.bib2lod.entity.Entity; +import org.ld4l.bib2lod.entitybuilders.BuildParams; +import org.ld4l.bib2lod.entitybuilders.EntityBuilder; import org.ld4l.bib2lod.ontology.ld4l.Ld4lActivityType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp; import org.ld4l.bib2lod.ontology.ld4l.Ld4lNamedIndividual; import org.ld4l.bib2lod.ontology.ld4l.Ld4lNamespace; import org.ld4l.bib2lod.ontology.ld4l.Ld4lObjectProp; +import org.ld4l.bib2lod.records.xml.XmlTextElement; import org.ld4l.bib2lod.records.xml.marcxml.MarcxmlControlField; import org.ld4l.bib2lod.records.xml.marcxml.MarcxmlDataField; import org.ld4l.bib2lod.records.xml.marcxml.MarcxmlSubfield; @@ -51,11 +54,16 @@ private void convert008() { activity.addExternalRelationship(Ld4lObjectProp.HAS_STATUS, Ld4lNamedIndividual.CURRENT); - // Publication date - String year = controlfield.getTextSubstring(7, 11); - if (! StringUtils.isBlank(year)) { + // Publication dates + String year1 = controlfield.getTextSubstring(7, 11); + if (! StringUtils.isBlank(year1)) { activity.addAttribute( - Ld4lDatatypeProp.DATE, year, BibDatatype.EDTF); + Ld4lDatatypeProp.DATE, year1, BibDatatype.EDTF); + } + String year2 = controlfield.getTextSubstring(11, 15); + if (! StringUtils.isBlank(year2)) { + activity.addAttribute( + Ld4lDatatypeProp.DATE, year2, BibDatatype.EDTF); } // Publication location @@ -87,10 +95,36 @@ private void convert260() buildLocation(MarcxmlSubfield.getSubfield(subfields, 'a')); buildAgent(MarcxmlSubfield.getSubfield(subfields, 'b')); - buildDate(MarcxmlSubfield.getSubfield(subfields, 'c')); + buildUntypedDate(MarcxmlSubfield.getSubfield(subfields, 'c')); // TODO 264 with indicator for publisher - otherwise a different type, // but otherwise the same (mostly?) } + @Override + protected void buildUntypedDate(MarcxmlSubfield subfield) + throws EntityBuilderException { + + if (subfield == null) { + return; + } + + String date = subfield.getTextValue(); + String[] dates = date.split(" ©"); + + activity.addAttribute(Ld4lDatatypeProp.DATE, + XmlTextElement.trimFinalPunctAndWhitespace(dates[0])); + + if (dates.length > 1) { + String copyright = dates[1].trim(); + EntityBuilder builder = getBuilder(Ld4lActivityType.defaultType()); + BuildParams params = new BuildParams() + .setParent(parent) + .setValue(copyright) + .setProperty(Ld4lDatatypeProp.DATE) + .setType(Ld4lActivityType.COPYRIGHT_HOLDER_ACTIVITY); + builder.build(params); + } + } + } diff --git a/src/main/java/org/ld4l/bib2lod/ontology/ld4l/Ld4lActivityType.java b/src/main/java/org/ld4l/bib2lod/ontology/ld4l/Ld4lActivityType.java index 6ffbf7ed6..98e041630 100644 --- a/src/main/java/org/ld4l/bib2lod/ontology/ld4l/Ld4lActivityType.java +++ b/src/main/java/org/ld4l/bib2lod/ontology/ld4l/Ld4lActivityType.java @@ -11,6 +11,8 @@ public enum Ld4lActivityType implements Type { ACTIVITY(Ld4lNamespace.BIBLIOTEKO, "Activity", "Activity"), AUTHOR_ACTIVITY( Ld4lNamespace.BIBLIOTEKO, "AuthorActivity", "Author"), + COPYRIGHT_HOLDER_ACTIVITY( + Ld4lNamespace.BIBLIOTEKO, "CopyrightHolderActivity", "C"), DISTRIBUTOR_ACTIVITY( Ld4lNamespace.BIBLIOTEKO, "DistributorActivity", "Distributor"), MANUFACTURER_ACTIVITY( diff --git a/src/main/java/org/ld4l/bib2lod/ontology/ld4l/Ld4lDatatypeProp.java b/src/main/java/org/ld4l/bib2lod/ontology/ld4l/Ld4lDatatypeProp.java index 8720c79a5..dd8fec21b 100644 --- a/src/main/java/org/ld4l/bib2lod/ontology/ld4l/Ld4lDatatypeProp.java +++ b/src/main/java/org/ld4l/bib2lod/ontology/ld4l/Ld4lDatatypeProp.java @@ -11,10 +11,12 @@ public enum Ld4lDatatypeProp implements DatatypeProp { /* List in alpha order */ + BIRTH_DATE(Ld4lNamespace.SCHEMA, "birthDate"), CHANGE_DATE(Ld4lNamespace.BIBFRAME, "changeDate"), CODE(Ld4lNamespace.BIBFRAME, "code"), COMMENT(Ld4lNamespace.RDFS, "comment"), DATE(Ld4lNamespace.DCTERMS, "date"), + DEATH_DATE(Ld4lNamespace.SCHEMA, "deathDate"), EDITION_STATEMENT(Ld4lNamespace.BIBFRAME, "editionStatement"), EDITORIAL_NOTE(Ld4lNamespace.SKOS, "editorialNote"), FAMILY_NAME(Ld4lNamespace.FOAF, "familyName"), diff --git a/src/main/java/org/ld4l/bib2lod/records/xml/XmlTextElement.java b/src/main/java/org/ld4l/bib2lod/records/xml/XmlTextElement.java index 8a6a3be7e..cf2519e09 100644 --- a/src/main/java/org/ld4l/bib2lod/records/xml/XmlTextElement.java +++ b/src/main/java/org/ld4l/bib2lod/records/xml/XmlTextElement.java @@ -2,10 +2,8 @@ package org.ld4l.bib2lod.records.xml; -import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.apache.commons.lang3.StringUtils; import org.ld4l.bib2lod.records.RecordField; import org.w3c.dom.CDATASection; import org.w3c.dom.Element; @@ -80,16 +78,7 @@ default char getCharAt(int pos) throws IndexOutOfBoundsException { static final String PATTERN_FINAL_PUNCT_AND_WHITESPACE = "\\s*[.,;:]?\\s*$"; - public static boolean endsWithPunct(String s) { - Matcher m = PATTERN_FINAL_PUNCT.matcher(s); - return m.matches(); - } - - public static String removeFinalPunct(String s) { - return endsWithPunct(s) ? StringUtils.chop(s) : s; - } - - public static String removeFinalPunctAndWhitespace(String s) { + public static String trimFinalPunctAndWhitespace(String s) { return s.replaceAll(PATTERN_FINAL_PUNCT_AND_WHITESPACE, ""); } @@ -100,7 +89,7 @@ public static String removeFinalPunctAndWhitespace(String s) { * titles, etc. and are not part of the actual text value. */ public static String trim(String s) { - return removeFinalPunctAndWhitespace(s.trim()); + return trimFinalPunctAndWhitespace(s.trim()); } } diff --git a/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlControlField.java b/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlControlField.java index 62fe3b7a7..0e43f983a 100644 --- a/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlControlField.java +++ b/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlControlField.java @@ -16,7 +16,6 @@ public class MarcxmlControlField extends BaseMarcxmlField @SuppressWarnings("unused") private static final Logger LOGGER = LogManager.getLogger(); - private static final String CONTROL_NUMBER_ATTRIBUTE_NAME = "tag"; private String tag; private String textValue; @@ -27,15 +26,11 @@ public class MarcxmlControlField extends BaseMarcxmlField */ public MarcxmlControlField(Element element) throws RecordFieldException { super(element); - tag = element.getAttribute(CONTROL_NUMBER_ATTRIBUTE_NAME); + tag = element.getAttribute("tag"); textValue = retrieveTextValue(this.element); isValid(); } - static String getControlNumberAttributeName() { - return CONTROL_NUMBER_ATTRIBUTE_NAME; - } - /** * Alias of getTag(). */ diff --git a/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlDataField.java b/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlDataField.java index fb4a175d7..ab7c93059 100644 --- a/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlDataField.java +++ b/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlDataField.java @@ -4,15 +4,11 @@ import java.util.ArrayList; import java.util.Arrays; -import java.util.HashSet; import java.util.List; -import java.util.Set; -import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.ld4l.bib2lod.records.xml.XmlTextElement; import org.w3c.dom.Element; import org.w3c.dom.NodeList; @@ -21,11 +17,7 @@ */ public class MarcxmlDataField extends BaseMarcxmlField implements MarcxmlTaggedField { - - private static List NON_REPEATING_FIELDS = Arrays.asList( - - ); - + @SuppressWarnings("unused") private static final Logger LOGGER = LogManager.getLogger(); @@ -56,61 +48,18 @@ public MarcxmlDataField(Element element) throws RecordFieldException { isValid(); } - - - /** - * Returns a list of values of subfields of this data field with the - * specified code. If no subfields, returns an empty list. Never returns - * null. - * @param code - the code of the subfields to get - * @param trim - if true, trim and remove final punct and whitespace - */ - public List getSubfieldValues(char code, boolean trim) { - List values = new ArrayList<>(); - for (MarcxmlSubfield subfield : subfields) { - if (!subfield.hasCode(code)) { - continue; - } - String value = subfield.getTextValue(); - values.add(trim ? XmlTextElement.trim(value) : value); - } - return values; - } - - public List getTrimmedSubfieldValues(char code) { - return getSubfieldValues(code, true); - } - - /** - * Returns a list of values of subfields of this data field with the - * specified code. If no subfields, returns an empty list. Never returns - * null. - * @param code - the code of the subfields to get - * @param clean - if true, trim and remove final punct and whitespace - */ - public Set getUniqueSubfieldValues(char code, boolean trim) { - return new HashSet(getSubfieldValues(code, trim)); - } - - public Set getUniqueTrimmedSubfieldValues(char code) { - return getUniqueSubfieldValues(code, true); + + @Override + public String getTag() { + return tag; } - /** - * Returns true iff this data field has a subfield with the specified code - * with the specified value. - * @param code - the code of the subfields to check - * @param value - the value to look for - * @param clean - if true, trim and remove final punct and whitespace - */ - public boolean hasSubfieldValue(char code, String value, boolean clean) { - - List values = getSubfieldValues(code, clean); - return values.contains(value); + public Integer getFirstIndicator() { + return ind1; } - public boolean hasCleanSubfieldValue(char code, String value) { - return hasSubfieldValue(code, value, true); + public Integer getSecondIndicator() { + return ind2; } private Integer getIndicatorValue(String ind, Element element) { @@ -120,22 +69,39 @@ private Integer getIndicatorValue(String ind, Element element) { } return Integer.parseInt(value); } - - @Override - public String getTag() { - return tag; + + public List getSubfields() { + return subfields; } - public Integer getFirstIndicator() { - return ind1; + /** + * Returns a list of values of subfields of this data field with the + * one of the specified codes, in order of occurrence. If no subfields, + * returns an empty list. Never returns null. + */ + public List listSubfieldValues(List codes) { + List values = new ArrayList<>(); + for (MarcxmlSubfield subfield : subfields) { + if (! codes.contains(subfield.getCode())) { + continue; + } + values.add(subfield.getTextValue()); + } + return values; } - public Integer getSecondIndicator() { - return ind2; - } - - public List getSubfields() { - return subfields; + /** + * Concatenates the string values of the specified subfields of this + * data field, in order of occurrence. If no subfields, + * returns null. + */ + public String concatenateSubfieldValues(List codes) { + String value = null; + List values = listSubfieldValues(codes); + if (values.size() > 0) { + value = StringUtils.join(values, " "); + } + return value; } /** @@ -196,61 +162,6 @@ public boolean hasSubfield(char code) { return getSubfield(code) != null; } - /** - * Returns a list of subfield codes in the datafield. Note that a valid - * datafield must have at least one subfield, so this method never - * returns an empty list. - */ - public List getSubfieldCodes() { - - List codes = new ArrayList<>(); - for (MarcxmlSubfield subfield : subfields) { - codes.add(subfield.getCode()); - } - return codes; - } - - public Set getUniqueSubfieldCodes() { - - Set codes = new HashSet<>(); - for (MarcxmlSubfield subfield : subfields) { - codes.add(subfield.getCode()); - } - return codes; - } - - /** - * Returns true iff this datafield contains at least one subfield - * in the specified list of character codes. - */ - public boolean containsAnySubfield(List codes) { - return CollectionUtils.containsAny(getSubfieldCodes(), codes); - } - - /** - * Returns true iff this datafield contains at least one subfield - * in the specified array of character codes. - */ - public boolean containsAnySubfield(Character[] codes) { - return containsAnySubfield(Arrays.asList(codes)); - } - - - /** - * Returns the datafield in the specified list with the specified tag value. - * Returns the first if multiple are found. Returns null if none are found. - */ - public static MarcxmlDataField get( - List fields, String tag) { - - for (MarcxmlDataField field: fields) { - if (field.getTag().equals(tag)) { - return field; - } - } - return null; - } - private void isValid() throws RecordFieldException { if (StringUtils.isBlank(tag)) { @@ -276,13 +187,5 @@ private void isValid() throws RecordFieldException { } } } - - public static boolean isNonRepeating(int tag) { - return NON_REPEATING_FIELDS.contains(tag); - } - - public static boolean isRepeating(int tag) { - return ! NON_REPEATING_FIELDS.contains(tag); - } - + } diff --git a/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlRecord.java b/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlRecord.java index 38032e560..62203c42b 100644 --- a/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlRecord.java +++ b/src/main/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlRecord.java @@ -86,8 +86,7 @@ private final List buildControlFields(Element record) Element field = (Element) controlFieldNodes.item(i); // There should be only one control field per control number; ignore // others. - String controlNumber = field.getAttribute( - MarcxmlControlField.getControlNumberAttributeName()); + String controlNumber = field.getAttribute("tag"); if (! controlNumbers.contains(controlNumber)) { controlFields.add(new MarcxmlControlField(field)); controlNumbers.add(controlNumber); diff --git a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AdminMetadataBuilderTest.java b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AdminMetadataBuilderTest.java index a84a39f97..cb6de7162 100644 --- a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AdminMetadataBuilderTest.java +++ b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AdminMetadataBuilderTest.java @@ -6,7 +6,7 @@ import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import org.ld4l.bib2lod.datatypes.XsdDatatype; +import org.ld4l.bib2lod.datatypes.Ld4lCustomDatatypes.BibDatatype; import org.ld4l.bib2lod.entity.Entity; import org.ld4l.bib2lod.entity.InstanceEntity; import org.ld4l.bib2lod.entitybuilders.BuildParams; @@ -189,7 +189,7 @@ public void testDateTimeValue_005() throws Exception { @Test public void testDateTimeDatatype_005() throws Exception { Entity adminMetadata = buildAdminMetadata(TEST_RECORD); - Assert.assertSame(XsdDatatype.DATETIME, adminMetadata.getAttribute( + Assert.assertSame(BibDatatype.EDTF, adminMetadata.getAttribute( Ld4lDatatypeProp.CHANGE_DATE).getDatatype()); } diff --git a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AgentBuilderTest.java b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AgentBuilderTest.java index dc1797a55..6593dbf58 100644 --- a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AgentBuilderTest.java +++ b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/AgentBuilderTest.java @@ -1,6 +1,5 @@ package org.ld4l.bib2lod.entitybuilders.marcxml.ld4l; -import static org.junit.Assert.fail; import static org.ld4l.bib2lod.testing.xml.testrecord.MockMarcxml.MINIMAL_RECORD; import java.util.List; @@ -8,14 +7,14 @@ import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; -import org.junit.Ignore; import org.junit.Test; +import org.ld4l.bib2lod.datatypes.Ld4lCustomDatatypes.BibDatatype; import org.ld4l.bib2lod.entity.Entity; import org.ld4l.bib2lod.entitybuilders.BuildParams; import org.ld4l.bib2lod.entitybuilders.EntityBuilder.EntityBuilderException; import org.ld4l.bib2lod.entitybuilders.EntityBuilderFactory; +import org.ld4l.bib2lod.ontology.ld4l.Ld4lAgentType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp; -import org.ld4l.bib2lod.ontology.ld4l.Ld4lInstanceType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lObjectProp; import org.ld4l.bib2lod.records.RecordField.RecordFieldException; import org.ld4l.bib2lod.records.xml.marcxml.MarcxmlSubfield; @@ -29,7 +28,7 @@ */ public class AgentBuilderTest extends AbstractTestClass { - public static final MockMarcxml DUPLICATE_AGENTS = MINIMAL_RECORD.openCopy() + public static final MockMarcxml _260_DUPLICATE_AGENTS = MINIMAL_RECORD.openCopy() .addDatafield("260", "3", " ") .addSubfield("a", "Lugduni Batavorum :") .addSubfield("b", "E.J. Brill") @@ -38,31 +37,40 @@ public class AgentBuilderTest extends AbstractTestClass { .addSubfield("b", "E.J. Brill") .lock(); - public static final MockMarcxml DIFFERENT_AGENTS = DUPLICATE_AGENTS.openCopy() + public static final MockMarcxml _260_DIFFERENT_AGENTS = _260_DUPLICATE_AGENTS.openCopy() .findDatafield("260", 1).replaceSubfield("b", "Random House") .lock(); private static final String NAME_SUBFIELD = "E.J. Brill"; - public static final MockMarcxml AUTHOR_FULL_NAME = MINIMAL_RECORD.openCopy() + public static final MockMarcxml _100_AUTHOR_PERSON = MINIMAL_RECORD.openCopy() .addDatafield("100", "1", "") - //.findDatafield("100") .addSubfield("a", "Austen, Jane") .addSubfield("d", "1775-1817") .lock(); + + public static final MockMarcxml _100_AUTHOR_FAMILY = MINIMAL_RECORD.openCopy() + .addDatafield("100", "3", "") + .addSubfield("a", "Clark family") + .lock(); - public static final MockMarcxml AUTHOR_SURNAME = MINIMAL_RECORD.openCopy() + public static final MockMarcxml _100_AUTHOR_COMPLEX_NAME = MINIMAL_RECORD.openCopy() .addDatafield("100", "1", "") - .addSubfield("a", "Watson,") - .addSubfield("c", "Rev.") - .addSubfield("d", "1775-1817") + .addSubfield("a", "Gustaf") + .addSubfield("b", "V,") + .addSubfield("c", "King of Sweden,") + .lock(); + + public static final MockMarcxml _100_AUTHOR_INITIALS = MINIMAL_RECORD.openCopy() + .addDatafield("100", "1", "") + .addSubfield("a", "Curien, P.-L.") .lock(); - public static final MockMarcxml AUTHOR_FORENAME = MINIMAL_RECORD.openCopy() + public static final MockMarcxml _100_AUTHOR_FULLER_NAME_FORM = MINIMAL_RECORD.openCopy() .addDatafield("100", "1", "") - .addSubfield("a", "John") - .addSubfield("c", "the Baptist, Saint.") + .addSubfield("a", "Claudius") + .addSubfield("q", "(Claudius Ceccon)") .lock(); private static BaseMockBib2LodObjectFactory factory; @@ -102,17 +110,6 @@ public void noNameOrSubfield_ThrowsException() throws Exception { agentBuilder.build(params); } - @Test - public void invalidType_ThrowsException() throws Exception { - expectException(EntityBuilderException.class, - "Invalid agent type"); - BuildParams params = new BuildParams() - .setType(Ld4lInstanceType.INSTANCE) - .addSubfield(buildSubfieldFromString(NAME_SUBFIELD)) - .setParent(new Entity()); - agentBuilder.build(params); - } - @Test public void testNameFromSubfield() throws Exception { BuildParams params = new BuildParams() @@ -124,9 +121,9 @@ public void testNameFromSubfield() throws Exception { } @Test - public void testReuseExistingAgent() throws Exception { + public void testReuseExistingAgent_260() throws Exception { BuildParams params = new BuildParams() - .setRecord(DUPLICATE_AGENTS.toRecord()); + .setRecord(_260_DUPLICATE_AGENTS.toRecord()); Entity instance = instanceBuilder.build(params); List activities = instance.getChildren(Ld4lObjectProp.HAS_ACTIVITY); @@ -137,9 +134,9 @@ public void testReuseExistingAgent() throws Exception { } @Test - public void testBuildNewAgent() throws Exception { + public void testBuildNewAgent_260() throws Exception { BuildParams params = new BuildParams() - .setRecord(DIFFERENT_AGENTS.toRecord()); + .setRecord(_260_DIFFERENT_AGENTS.toRecord()); Entity instance = instanceBuilder.build(params); List activities = instance.getChildren(Ld4lObjectProp.HAS_ACTIVITY); @@ -150,21 +147,78 @@ public void testBuildNewAgent() throws Exception { } @Test - @Ignore - public void testAuthorFullName() throws Exception { - fail("testAuthorName not yet implemented."); + public void testAuthorIsPerson_100() throws Exception { + Entity author = buildAgent(_100_AUTHOR_PERSON, "100"); + Assert.assertTrue(author.hasType(Ld4lAgentType.PERSON)); + } + + @Test + public void testAuthorIsFamily_100() throws Exception { + Entity author = buildAgent(_100_AUTHOR_FAMILY, "100"); + Assert.assertTrue(author.hasType(Ld4lAgentType.FAMILY)); + } + + @Test + public void testAuthorPersonName_100() throws Exception { + Entity author = buildAgent(_100_AUTHOR_PERSON, "100"); + Assert.assertEquals("Austen, Jane", + author.getValue(Ld4lDatatypeProp.NAME)); + } + + @Test + public void testAuthorPersonNameDatatype_100() throws Exception { + Entity author = buildAgent(_100_AUTHOR_PERSON, "100"); + Assert.assertEquals(BibDatatype.LEGACY_SOURCE_DATA, + author.getAttribute(Ld4lDatatypeProp.NAME).getDatatype()); + } + + @Test + public void testAuthorComplexPersonName_100() throws Exception { + Entity author = buildAgent(_100_AUTHOR_COMPLEX_NAME, "100"); + Assert.assertEquals("Gustaf V, King of Sweden", + author.getValue(Ld4lDatatypeProp.NAME)); + } + + @Test + public void testAuthorInitials_100() throws Exception { + Entity author = buildAgent(_100_AUTHOR_INITIALS, "100"); + Assert.assertEquals("Curien, P.-L.", + author.getValue(Ld4lDatatypeProp.NAME)); + } + + @Test + public void testAuthorFullerPersonName_100() throws Exception { + Entity author = buildAgent(_100_AUTHOR_FULLER_NAME_FORM, "100"); + Assert.assertEquals("Claudius (Claudius Ceccon)", + author.getValue(Ld4lDatatypeProp.NAME)); + } + + @Test + public void testAuthorFamilyName_100() throws Exception { + Entity author = buildAgent(_100_AUTHOR_FAMILY, "100"); + Assert.assertEquals("Clark family", + author.getValue(Ld4lDatatypeProp.NAME)); + } + + @Test + public void testAuthorFamilyNameDatatype_100$a() throws Exception { + Entity author = buildAgent(_100_AUTHOR_FAMILY, "100"); + Assert.assertEquals(BibDatatype.LEGACY_SOURCE_DATA, + author.getAttribute(Ld4lDatatypeProp.NAME).getDatatype()); } @Test - @Ignore - public void testAuthorDates() throws Exception { - fail("testAuthorDates not yet implemented."); + public void testAuthorDateValue() throws Exception { + Entity author = buildAgent(_100_AUTHOR_PERSON, "100"); + Assert.assertEquals("1775-1817", + author.getValue(Ld4lDatatypeProp.DATE)); } @Test - @Ignore - public void testAuthorBirthdate() throws Exception { - fail("testAuthorBirthdate not yet implemented."); + public void testAuthorDateDatatype() throws Exception { + Entity author = buildAgent(_100_AUTHOR_PERSON, "100"); + Assert.assertEquals(BibDatatype.LEGACY_SOURCE_DATA, + author.getAttribute(Ld4lDatatypeProp.DATE).getDatatype()); } @@ -172,9 +226,18 @@ public void testAuthorBirthdate() throws Exception { // Helper methods // --------------------------------------------------------------------- + // TODO Integrate into MockMarcxml framework private MarcxmlSubfield buildSubfieldFromString( String element) throws RecordFieldException { return new MarcxmlSubfield( XmlTestUtils.buildElementFromString(element)); } + + private Entity buildAgent(MockMarcxml marcxml, String tag) + throws Exception { + BuildParams params = new BuildParams() + .setParent(new Entity()) + .setField(marcxml.toRecord().getTaggedField(tag)); + return agentBuilder.build(params); + } } diff --git a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ActivityBuilderTest.java b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ActivityBuilderTest.java index 10adfab81..04ead9d39 100644 --- a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ActivityBuilderTest.java +++ b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ActivityBuilderTest.java @@ -8,9 +8,12 @@ import org.junit.Test; import org.ld4l.bib2lod.entity.Entity; import org.ld4l.bib2lod.entitybuilders.BuildParams; +import org.ld4l.bib2lod.entitybuilders.EntityBuilder; import org.ld4l.bib2lod.entitybuilders.EntityBuilder.EntityBuilderException; import org.ld4l.bib2lod.entitybuilders.EntityBuilderFactory; import org.ld4l.bib2lod.entitybuilders.marcxml.ld4l.MarcxmlToLd4lEntityBuilderFactory; +import org.ld4l.bib2lod.ontology.DatatypeProp; +import org.ld4l.bib2lod.ontology.Type; import org.ld4l.bib2lod.ontology.ld4l.Ld4lActivityType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp; import org.ld4l.bib2lod.ontology.ld4l.Ld4lObjectProp; @@ -26,17 +29,15 @@ public class ActivityBuilderTest extends AbstractTestClass { public static final MockMarcxml _260_PUBLISHER = MINIMAL_RECORD.openCopy() .addControlfield("001", "102063") - .findDatafield("245").findSubfield("a").setValue("full title") .addDatafield("260", " ", " ").addSubfield("b", "Grune & Stratton,") .lock(); public static final MockMarcxml _100_AUTHOR = MINIMAL_RECORD.openCopy() - .findDatafield("245").findSubfield("a").setValue("full title") .addDatafield("100", "0", " ").addSubfield("a", "Manya K'Omalowete a Djonga,") .lock(); private static BaseMockBib2LodObjectFactory factory; - private PublisherActivityBuilder builder; + private PublisherActivityBuilder publisherActivityBuilder; @BeforeClass public static void setUpOnce() throws Exception { @@ -47,7 +48,7 @@ public static void setUpOnce() throws Exception { @Before public void setUp() { - this.builder = new PublisherActivityBuilder(); + this.publisherActivityBuilder = new PublisherActivityBuilder(); } // --------------------------------------------------------------------- @@ -60,18 +61,29 @@ public void nullParent() throws Exception { "A parent entity is required"); BuildParams params = new BuildParams() .setParent(null); - builder.build(params); + publisherActivityBuilder.build(params); } @Test - public void nullField_ThrowsException() throws Exception { + public void nullFieldAndProperty_ThrowsException() throws Exception { expectException(EntityBuilderException.class, - "A field is required"); + "A field or property and value"); BuildParams params = new BuildParams() .setParent(new Entity()) .setRecord(null) - .setField(null); - builder.build(params); + .setProperty(Ld4lDatatypeProp.NAME); + publisherActivityBuilder.build(params); + } + + @Test + public void nullFieldAndValue_ThrowsException() throws Exception { + expectException(EntityBuilderException.class, + "A field or property and value"); + BuildParams params = new BuildParams() + .setParent(new Entity()) + .setRecord(null) + .setValue("value"); + publisherActivityBuilder.build(params); } @Test @@ -84,7 +96,7 @@ public void invalidFieldType_ThrowsException() throws Exception { .setField(new MarcxmlSubfield( XmlTestUtils.buildElementFromString( "test"))); - builder.build(params); + publisherActivityBuilder.build(params); } @Test @@ -100,6 +112,13 @@ public void testRelationshipToResource() throws Exception { Entity activity = buildActivity(instance, _260_PUBLISHER, "260"); Assert.assertTrue(instance.hasChild(Ld4lObjectProp.HAS_ACTIVITY, activity)); } + + @Test + public void testCopyrightHolderActivity() throws Exception { + Entity activity = buildActivity(Ld4lActivityType.COPYRIGHT_HOLDER_ACTIVITY, + Ld4lDatatypeProp.DATE, "1957"); + Assert.assertEquals("1957", activity.getValue(Ld4lDatatypeProp.DATE)); + } // --------------------------------------------------------------------- // Helper methods @@ -117,6 +136,17 @@ private Entity buildActivity(Entity parent, MockMarcxml input, String tag) .setParent(parent) .setRecord(record) .setField(record.getDataField(tag)); + return publisherActivityBuilder.build(params); + } + + private Entity buildActivity(Type type, DatatypeProp property, String value) + throws Exception { + EntityBuilder builder = new ActivityBuilder(); + BuildParams params = new BuildParams() + .setParent(new Entity()) + .setProperty(property) + .setType(type) + .setValue(value); return builder.build(params); } diff --git a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ManufacturerActivityBuilderTest.java b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ManufacturerActivityBuilderTest.java index 34c79e634..c983a82ef 100644 --- a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ManufacturerActivityBuilderTest.java +++ b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/ManufacturerActivityBuilderTest.java @@ -55,7 +55,7 @@ public void setUp() throws RecordFieldException { @Test public void testManufacturer_260() throws Exception { Entity activity = buildActivity(_260_MANUFACTURER, "260", - Arrays.asList('e', 'f', 'g')); + Arrays.asList('e')); Assert.assertEquals(Ld4lActivityType.MANUFACTURER_ACTIVITY, activity.getType()); } diff --git a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/PublisherActivityBuilderTest.java b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/PublisherActivityBuilderTest.java index 306401b2a..e6df3f33b 100644 --- a/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/PublisherActivityBuilderTest.java +++ b/src/test/java/org/ld4l/bib2lod/entitybuilders/marcxml/ld4l/activities/PublisherActivityBuilderTest.java @@ -3,18 +3,24 @@ import static org.ld4l.bib2lod.testing.xml.testrecord.MockMarcxml.MINIMAL_RECORD; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.jena.rdf.model.Literal; import org.apache.jena.rdf.model.ResourceFactory; import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Test; import org.ld4l.bib2lod.datatypes.Ld4lCustomDatatypes.BibDatatype; import org.ld4l.bib2lod.datatypes.Ld4lCustomDatatypes.EdtfType; import org.ld4l.bib2lod.entity.Attribute; import org.ld4l.bib2lod.entity.Entity; import org.ld4l.bib2lod.entitybuilders.BuildParams; +import org.ld4l.bib2lod.entitybuilders.EntityBuilderFactory; +import org.ld4l.bib2lod.entitybuilders.marcxml.ld4l.InstanceBuilder; +import org.ld4l.bib2lod.entitybuilders.marcxml.ld4l.MarcxmlToLd4lEntityBuilderFactory; +import org.ld4l.bib2lod.ontology.ld4l.Ld4lActivityType; import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp; import org.ld4l.bib2lod.ontology.ld4l.Ld4lNamedIndividual; import org.ld4l.bib2lod.ontology.ld4l.Ld4lNamespace; @@ -24,6 +30,7 @@ import org.ld4l.bib2lod.records.xml.marcxml.MarcxmlRecord; import org.ld4l.bib2lod.records.xml.marcxml.MarcxmlTaggedField; import org.ld4l.bib2lod.testing.AbstractTestClass; +import org.ld4l.bib2lod.testing.BaseMockBib2LodObjectFactory; import org.ld4l.bib2lod.testing.xml.testrecord.MockMarcxml; @@ -39,6 +46,10 @@ public class PublisherActivityBuilderTest extends AbstractTestClass { private static final MockMarcxml _008_NO_DATE = MINIMAL_RECORD.openCopy() .replaceControlfield("008", "860506s nyua b 000 0 eng ") .lock(); + + private static final MockMarcxml _008_TWO_DATES = MINIMAL_RECORD.openCopy() + .replaceControlfield("008", "860506s19571960 a b 000 0 eng ") + .lock(); public static final MockMarcxml _008_TWO_CHAR_PUB_LOCATION = MINIMAL_RECORD.openCopy() .findControlfield("008").setValue("750226c18529999ne bx p 0 b0eng ") @@ -70,6 +81,11 @@ public class PublisherActivityBuilderTest extends AbstractTestClass { .addSubfield("a", "Leiden :") .addSubfield("b", "E.J. Brill") .lock(); + + public static final MockMarcxml _260_COPYRIGHT_DATE = MINIMAL_RECORD.openCopy() + .addControlfield("001", "102063") + .addDatafield("260", " ", " ").addSubfield("c", "1957 ©1957") + .lock(); public static final String _001 = "102063"; @@ -77,12 +93,21 @@ public class PublisherActivityBuilderTest extends AbstractTestClass { public static final String _245 = "text"; - - private PublisherActivityBuilder builder; + private static BaseMockBib2LodObjectFactory factory; + private InstanceBuilder instanceBuilder; + private PublisherActivityBuilder publisherActivityBuilder; + + @BeforeClass + public static void setUpOnce() throws Exception { + factory = new BaseMockBib2LodObjectFactory(); + factory.addInstance(EntityBuilderFactory.class, + new MarcxmlToLd4lEntityBuilderFactory()); + } @Before - public void setUp() { - this.builder = new PublisherActivityBuilder(); + public void setUp() { + this.instanceBuilder = new InstanceBuilder(); + this.publisherActivityBuilder = new PublisherActivityBuilder(); } // --------------------------------------------------------------------- @@ -134,7 +159,7 @@ public void noLocation_008_Succeeds() throws Exception { } @Test - public void testActivityDate_008() throws Exception { + public void testActivityDate1_008() throws Exception { Entity activity = buildActivity("008"); Attribute attribute = activity.getAttribute(Ld4lDatatypeProp.DATE); Literal literal = ResourceFactory.createTypedLiteral( @@ -142,6 +167,22 @@ public void testActivityDate_008() throws Exception { Assert.assertEquals(literal, attribute.toLiteral()); } + @Test + public void testActivityTwoDates_008() throws Exception { + Entity activity = buildActivity(_008_TWO_DATES, "008"); + Assert.assertEquals(2, + activity.getAttributes(Ld4lDatatypeProp.DATE).size()); + } + + @Test + public void testActivityDate2_008() throws Exception { + Entity activity = buildActivity(_008_TWO_DATES, "008"); + Attribute attribute = activity.getAttributes(Ld4lDatatypeProp.DATE).get(1); + Literal literal = ResourceFactory.createTypedLiteral( + "1960", BibDatatype.EDTF.rdfType()); + Assert.assertEquals(literal, attribute.toLiteral()); + } + @Test public void blankDate_008_Succeeds() throws Exception { buildActivity(_008_NO_DATE, "008"); @@ -169,31 +210,63 @@ public void testCurrentPublisherStatus_ind1ValueEmpty() activity.getExternal(Ld4lObjectProp.HAS_STATUS)); } + @Test + public void testDate() throws Exception { + Entity activity = buildActivity(_260_PUBLISHER, "260", Arrays.asList('c')); + Assert.assertEquals("1957", activity.getValue(Ld4lDatatypeProp.DATE)); + } + + @Test + public void testCopyrightHolderActivity() throws Exception { + Entity instance = buildInstance(_260_COPYRIGHT_DATE); + Entity activity = instance.getChild(Ld4lObjectProp.HAS_ACTIVITY, + Ld4lActivityType.COPYRIGHT_HOLDER_ACTIVITY); + Assert.assertNotNull(activity); + } + + @Test + public void testCopyrightDate() throws Exception { + Entity instance = buildInstance(_260_COPYRIGHT_DATE); + Entity activity = instance.getChild(Ld4lObjectProp.HAS_ACTIVITY, + Ld4lActivityType.COPYRIGHT_HOLDER_ACTIVITY); + Assert.assertEquals("1957", activity.getValue(Ld4lDatatypeProp.DATE)); + } + // --------------------------------------------------------------------- // Helper methods // --------------------------------------------------------------------- + + private Entity buildInstance(MockMarcxml input) throws Exception { + return instanceBuilder.build( + new BuildParams().setRecord(input.toRecord())); + } private Entity buildActivity(String tag) throws Exception { - return buildActivity(MINIMAL_RECORD, tag, null); + return buildActivity(new Entity(), MINIMAL_RECORD, tag, null); } private Entity buildActivity(MockMarcxml input, String tag) throws Exception { - return buildActivity(input, tag, null); + return buildActivity(new Entity(), input, tag, null); } private Entity buildActivity(MockMarcxml input, String tag, List codes) throws Exception { + return buildActivity(new Entity(), input, tag, codes); + } + + private Entity buildActivity(Entity parent, MockMarcxml input, + String tag, List codes) throws Exception { MarcxmlRecord record = input.toRecord(); MarcxmlTaggedField field = record.getTaggedField(tag); BuildParams params = new BuildParams() - .setParent(new Entity()) + .setParent(parent) .setRecord(record) .setField(field); - + if (field instanceof MarcxmlDataField && codes != null) { List subfields = new ArrayList<>(); for (char code : codes) { @@ -202,7 +275,7 @@ private Entity buildActivity(MockMarcxml input, String tag, params.setSubfields(subfields); } - return builder.build(params); + return publisherActivityBuilder.build(params); } diff --git a/src/test/java/org/ld4l/bib2lod/records/xml/XmlTextElementTest.java b/src/test/java/org/ld4l/bib2lod/records/xml/XmlTextElementTest.java index 4e2e4bb15..1817e7dff 100644 --- a/src/test/java/org/ld4l/bib2lod/records/xml/XmlTextElementTest.java +++ b/src/test/java/org/ld4l/bib2lod/records/xml/XmlTextElementTest.java @@ -78,26 +78,10 @@ public void testGetCharAt() throws Exception { Assert.assertEquals('h', element.getCharAt(4)); } - @Test - public void testPunctFinal() { - Assert.assertTrue(XmlTextElement.endsWithPunct("test.")); - } - - @Test - public void testNonPunctFinal() { - Assert.assertFalse(XmlTextElement.endsWithPunct("test")); - } - - @Test - public void testRemoveFinalPunct() { - Assert.assertEquals( - "test", XmlTextElement.removeFinalPunct("test.")); - } - @Test public void testRemoveFinalPunctAndWhitespace() { Assert.assertEquals("test", - XmlTextElement.removeFinalPunctAndWhitespace("test : ")); + XmlTextElement.trimFinalPunctAndWhitespace("test : ")); } @Test diff --git a/src/test/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlDataFieldTest.java b/src/test/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlDataFieldTest.java index e82498bf6..e9cd9b3dd 100644 --- a/src/test/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlDataFieldTest.java +++ b/src/test/java/org/ld4l/bib2lod/records/xml/marcxml/MarcxmlDataFieldTest.java @@ -3,9 +3,7 @@ package org.ld4l.bib2lod.records.xml.marcxml; import java.util.Arrays; -import java.util.HashSet; import java.util.List; -import java.util.Set; import org.junit.Assert; import org.junit.Test; @@ -16,6 +14,7 @@ import org.w3c.dom.Element; + /** * Tests class MarcxmlDataField. */ @@ -65,29 +64,15 @@ public class MarcxmlDataFieldTest extends AbstractTestClass { "Clinical cardiopulmonary physiology." + ""; - private static final String REPEATED_SUBFIELDS = - "" + - "One " + - " Two" + - "Three : " + - "Three : " + - "Three ; " + - "B1" + - "B2" + - "C" + - ""; - private static final String MULTIPLE_SUBFIELDS = - "" + - "A1" + - "A2" + - "A3" + - "B1" + - "B2" + - "C1" + - ""; + "" + + "subfield a," + + "subfield b," + + "subfield b again," + + "subfield c," + + "subfield d," + + ""; - private MarcxmlDataField datafield; // --------------------------------------------------------------------- // The tests @@ -147,67 +132,41 @@ public void invalidField245_ThrowsException() throws Exception { public void validDataField_Valid() throws Exception { // No exception buildFromString(VALID_DATAFIELD); - } + } @Test - public void testGetSubfieldValues() throws Exception { - datafield = buildFromString(REPEATED_SUBFIELDS); - List values = datafield.getSubfieldValues('a', false); - Assert.assertEquals(5, values.size()); + public void testListSubfieldValues() throws Exception { + MarcxmlDataField field = buildFromString(MULTIPLE_SUBFIELDS); + List list = field.listSubfieldValues( + Arrays.asList('a', 'b', 'c', 'e')); + Assert.assertEquals(4, list.size()); } @Test - public void testGetTrimmedSubfieldValues() throws Exception { - datafield = buildFromString(REPEATED_SUBFIELDS); - List values = datafield.getTrimmedSubfieldValues('a'); - Assert.assertEquals("Three", values.get(3)); + public void testListSubfieldValuesNoMatch() throws Exception { + MarcxmlDataField field = buildFromString(MULTIPLE_SUBFIELDS); + List list = field.listSubfieldValues( + Arrays.asList('x', 'y', 'z')); + Assert.assertEquals(0, list.size()); } - - @Test - public void testGetUniqueSubfieldValues() throws Exception { - datafield = buildFromString(REPEATED_SUBFIELDS); - Set values = datafield.getUniqueSubfieldValues('a', false); - Assert.assertEquals(4, values.size()); - } - - @Test - public void testGetUniqueTrimmedSubfieldValues() throws Exception { - datafield = buildFromString(REPEATED_SUBFIELDS); - Set values = datafield.getUniqueTrimmedSubfieldValues('a'); - Assert.assertEquals(3, values.size()); - } - - @Test - public void testGetSubfieldCodes() throws Exception { - MarcxmlDataField field = buildFromString(MULTIPLE_SUBFIELDS); - List expected = Arrays.asList( - 'a', 'a', 'a', 'b', 'b', 'c'); - Assert.assertEquals(expected, field.getSubfieldCodes()); - - } - - @Test - public void testGetUniqueSubfieldCodes() throws Exception { - MarcxmlDataField field = buildFromString(MULTIPLE_SUBFIELDS); - Set expected = new HashSet<>(Arrays.asList('a', 'b', 'c')); - Assert.assertEquals(expected, field.getUniqueSubfieldCodes()); - } - + @Test - public void testContainsSomeSubfield() throws Exception { - MarcxmlDataField field = buildFromString(VALID_DATAFIELD); - - Character[] codes = {'a', 'e', 'g'}; - Assert.assertTrue(field.containsAnySubfield(codes)); + public void testConcatenateSubfieldValues() throws Exception { + MarcxmlDataField field = buildFromString(MULTIPLE_SUBFIELDS); + String values = field.concatenateSubfieldValues( + Arrays.asList('a', 'b', 'c', 'e')); + Assert.assertEquals("subfield a, subfield b, subfield b again, " + + "subfield c,", values); } @Test - public void testDoesNotContainSomeSubfield() throws Exception { - MarcxmlDataField field = buildFromString(VALID_DATAFIELD); - Character[] codes = {'e', 'f', 'g'}; - Assert.assertFalse(field.containsAnySubfield(codes)); + public void testConcatenateSubfieldValuesNoMatch() throws Exception { + MarcxmlDataField field = buildFromString(MULTIPLE_SUBFIELDS); + String values = field.concatenateSubfieldValues( + Arrays.asList('x', 'y', 'z')); + Assert.assertNull(values); } - + // --------------------------------------------------------------------- // Helper methods