Skip to content

Commit

Permalink
Refactor field tag as String rather than int
Browse files Browse the repository at this point in the history
Local tags may be alphanumeric (e.g., Harvard “H08”)
  • Loading branch information
rjyounes committed Sep 5, 2017
1 parent da192ee commit 36e0b55
Show file tree
Hide file tree
Showing 18 changed files with 114 additions and 115 deletions.
Expand Up @@ -126,7 +126,7 @@ protected Entity buildChildFromRecord(Type type, Entity parent,
}

protected Entity buildChildFromControlField(Type type, Entity parent,
MarcxmlRecord record, int tag) throws EntityBuilderException {
MarcxmlRecord record, String tag) throws EntityBuilderException {

MarcxmlControlField field = record.getControlField(tag);
if (field == null) {
Expand All @@ -143,7 +143,7 @@ protected Entity buildChildFromControlField(Type type, Entity parent,
}

protected Entity buildChildFromDataField(Type type, Entity parent,
MarcxmlRecord record, int tag) throws EntityBuilderException {
MarcxmlRecord record, String tag) throws EntityBuilderException {

MarcxmlDataField field = record.getDataField(tag);
if (field == null) {
Expand Down
Expand Up @@ -39,7 +39,7 @@ public Entity build(BuildParams params) throws EntityBuilderException {

// Control field 001: local identifier
buildChildFromControlField(
Ld4lIdentifierType.superClass(), adminMetadata, record, 1);
Ld4lIdentifierType.superClass(), adminMetadata, record, "001");

convert_040();

Expand Down Expand Up @@ -80,7 +80,7 @@ private void parseBuildParams(BuildParams params)

private void convert_040() throws EntityBuilderException {

MarcxmlDataField field = record.getDataField(40);
MarcxmlDataField field = record.getDataField("040");

if (field == null) {
return;
Expand Down Expand Up @@ -176,7 +176,7 @@ private void addDescriptionConventions(MarcxmlDataField field)

private void convert_005() throws EntityBuilderException {

MarcxmlControlField field_005 = record.getControlField(5);
MarcxmlControlField field_005 = record.getControlField("005");

if (field_005 == null) {
return;
Expand Down
Expand Up @@ -7,7 +7,6 @@
import org.ld4l.bib2lod.entitybuilders.BuildParams;
import org.ld4l.bib2lod.ontology.ObjectProp;
import org.ld4l.bib2lod.ontology.Type;
import org.ld4l.bib2lod.ontology.ld4l.Ld4lActivityType;
import org.ld4l.bib2lod.ontology.ld4l.Ld4lAgentType;
import org.ld4l.bib2lod.ontology.ld4l.Ld4lDatatypeProp;
import org.ld4l.bib2lod.ontology.ld4l.Ld4lObjectProp;
Expand Down
Expand Up @@ -141,7 +141,7 @@ private Entity build(MarcxmlControlField field) {

Entity identifier = null;

if (field.getTag() == 1) {
if (field.getTag().equals("001")) {
identifier = buildFromTextField(Ld4lIdentifierType.LOCAL,
Ld4lDatatypeProp.VALUE, field);
}
Expand All @@ -161,7 +161,7 @@ private Entity build(MarcxmlDataField field, BuildParams params)
MarcxmlSubfield subfield =
(MarcxmlSubfield) params.getSubfields().get(0);

if (field.getTag() == 35) {
if (field.getTag().equals("035")) {
identifier = convert_035(subfield);
}

Expand Down
Expand Up @@ -88,7 +88,7 @@ private void buildIdentifiers() throws EntityBuilderException {
private void convert_035() throws EntityBuilderException {

// 035 is a repeating field
List<MarcxmlDataField> fields = record.getDataFields(35);
List<MarcxmlDataField> fields = record.getDataFields("035");
if (fields.isEmpty()) {
return;
}
Expand Down Expand Up @@ -127,7 +127,7 @@ private void buildPhysicalDescriptions() throws EntityBuilderException {
private void buildExtent() throws EntityBuilderException {

// 300
List<MarcxmlDataField> fields = record.getDataFields(300);
List<MarcxmlDataField> fields = record.getDataFields("300");

if (fields.size() == 0) {
return;
Expand Down Expand Up @@ -187,12 +187,12 @@ private void buildPublisherActivities() throws EntityBuilderException {
.setRecord(record);

// First build current publisher activity from mandatory 008.
builder.build(params.setField(record.getControlField(8)));
builder.build(params.setField(record.getControlField("008")));

// 260 fields: build additional publisher activities and add data to
// current publisher activity from 008.
List<Character> publisherCodes = Arrays.asList('a', 'b', 'c');
for (MarcxmlDataField field : record.getDataFields(260)) {
for (MarcxmlDataField field : record.getDataFields("260")) {
params.setField(field);
List<List<RecordField>> subfieldLists = ProviderActivityBuilder.
getActivitySubfields(field, publisherCodes);
Expand All @@ -208,15 +208,15 @@ private void buildProvisionActivityStatements() {

// Each 260 and 264 yields one statement from all $a$b$c concatenated.
buildProvisionActivityStatements(
Arrays.asList(260, 264), Arrays.asList('a', 'b', 'c'));
Arrays.asList("260", "264"), Arrays.asList('a', 'b', 'c'));

// Each 260 yields one statement from all $e$f$g concatenated.
buildProvisionActivityStatements(
Arrays.asList(260), Arrays.asList('e', 'f', 'g'));
Arrays.asList("260"), Arrays.asList('e', 'f', 'g'));
}

private void buildProvisionActivityStatements(
List<Integer> tags, List<Character> codes) {
List<String> tags, List<Character> codes) {

for (MarcxmlDataField field : record.getDataFields(tags)) {

Expand Down Expand Up @@ -246,7 +246,7 @@ private void buildManufacturerActivities() throws EntityBuilderException {

// Build manufacturer activities from 260$e$f$g
List<Character> manufacturerCodes = Arrays.asList('e', 'f', 'g');
for (MarcxmlDataField field : record.getDataFields(260)) {
for (MarcxmlDataField field : record.getDataFields("260")) {
params.setField(field);
List<List<RecordField>> subfieldLists =
ProviderActivityBuilder.getActivitySubfields(
Expand All @@ -269,7 +269,7 @@ private void buildProviderActivities() throws EntityBuilderException {
*/
private void buildResponsiblityStatement() {

MarcxmlDataField field = record.getDataField(245);
MarcxmlDataField field = record.getDataField("245");
if (field == null) {
return;
}
Expand Down
Expand Up @@ -52,7 +52,7 @@ private void parseBuildParams(BuildParams params)
private Entity convertByField() {

switch (field.getTag()) {
case 300:
case "300":
return convert_300();
default:
return null;
Expand Down
Expand Up @@ -98,17 +98,17 @@ private void buildTitleElements() throws EntityBuilderException {
titleElements = new ArrayList<>();

// Note that every record must have a 245
MarcxmlDataField field245 = record.getDataField(245);
MarcxmlDataField field_245 = record.getDataField("245");

for (MarcxmlSubfield subfield : field245.getSubfields()) {
for (MarcxmlSubfield subfield : field_245.getSubfields()) {

char code = subfield.getCode();

switch (code) {
// 245$a always stores the full title. If 130 and/or 240 are
// present,the $a fields should be the same.
case 'a':
addNonSortAndMainTitleElements(field245, subfield);
addNonSortAndMainTitleElements(field_245, subfield);
break;
case 'b':
addSubtitleElements(subfield);
Expand Down
Expand Up @@ -109,8 +109,8 @@ private void addLanguages() {

/* TODO Codes not the same between lexvo and lc. Just use lc URIs for now. */
// Language from 008
MarcxmlControlField field008 = record.getControlField(8);
String code = field008.getTextSubstring(35,38);
MarcxmlControlField field_008 = record.getControlField("008");
String code = field_008.getTextSubstring(35,38);
if (code != null && code.length() > 0) {
// Lexvo iso639-3 codes are not completely identical with LC
work.addExternalRelationship(Ld4lObjectProp.HAS_LANGUAGE,
Expand Down
Expand Up @@ -20,7 +20,7 @@ public void build() throws EntityBuilderException {
this.type = TYPE;

// Not sure if any other tags apply
if (field.getTag() == 260) {
if (field.getTag().equals("260")) {
convert_260();
}
}
Expand Down
Expand Up @@ -28,9 +28,9 @@ public void build() throws EntityBuilderException {

this.type = TYPE;

if (field.getTag() == 8) {
if (field.getTag().equals("008")) {
convert_008();
} else if (field.getTag() == 260) {
} else if (field.getTag().equals("260")) {
convert_260();
}
}
Expand Down Expand Up @@ -74,7 +74,7 @@ private void convert_260()
// First indicator == 3
if ( (ind1 != null && ind1 == 3) ||
// This is the only 260
record.getDataFields(260).size() == 1) {
record.getDataFields("260").size() == 1) {
activity.addExternalRelationship(Ld4lObjectProp.HAS_STATUS,
Ld4lNamedIndividual.CURRENT);
}
Expand Down
Expand Up @@ -2,6 +2,7 @@

package org.ld4l.bib2lod.records.xml.marcxml;

import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.ld4l.bib2lod.records.xml.XmlTextElement;
Expand All @@ -17,7 +18,7 @@ public class MarcxmlControlField extends BaseMarcxmlField
private static final Logger LOGGER = LogManager.getLogger();
private static final String CONTROL_NUMBER_ATTRIBUTE_NAME = "tag";

private Integer tag;
private String tag;
private String textValue;


Expand All @@ -26,14 +27,9 @@ public class MarcxmlControlField extends BaseMarcxmlField
*/
public MarcxmlControlField(Element element) throws RecordFieldException {
super(element);
try {
tag = Integer.parseInt(
element.getAttribute(CONTROL_NUMBER_ATTRIBUTE_NAME));
textValue = retrieveTextValue(this.element);
isValid();
} catch (NumberFormatException e) {
throw new RecordFieldException("Control number is not an integer.");
}
tag = element.getAttribute(CONTROL_NUMBER_ATTRIBUTE_NAME);
textValue = retrieveTextValue(this.element);
isValid();
}

static String getControlNumberAttributeName() {
Expand All @@ -43,12 +39,12 @@ static String getControlNumberAttributeName() {
/**
* Alias of getTag().
*/
public int getControlNumber() {
public String getControlNumber() {
return tag;
}

@Override
public int getTag() {
public String getTag() {
return tag;
}

Expand All @@ -59,22 +55,29 @@ public String getTextValue() {

private void isValid() throws RecordFieldException {

if (tag == null) {
throw new RecordFieldException("Control number is null.");
}
if (! (tag > 0 && tag < 10)) {
throw new RecordFieldException(
"Control number is not between 1 and 9.");
}
if (textValue == null) {
throw new RecordFieldException("Text value is null.");
if (StringUtils.isBlank(tag)) {
throw new RecordFieldException("Control number must be non-empty.");
}
if (textValue.isEmpty()) {
throw new RecordFieldException("Text value is empty.");
}
if (tag == 8 && textValue.length() != 40) {
throw new RecordFieldException(
"Control field 008 does not contain exactly 40 characters.");

try {
int tagValue = Integer.parseInt(tag);

if (! (tagValue > 0 && tagValue < 10)) {
throw new RecordFieldException(
"Control number is not between 1 and 9.");
}
if (textValue == null) {
throw new RecordFieldException("Text value is null.");
}
if (textValue.isEmpty()) {
throw new RecordFieldException("Text value is empty.");
}
if (tagValue == 8 && textValue.length() != 40) {
throw new RecordFieldException("Control field 008 does not "
+ "contain exactly 40 characters.");
}
} catch (NumberFormatException e) {
throw new RecordFieldException("Control number must be numeric");
}
}

Expand Down
Expand Up @@ -29,7 +29,7 @@ public class MarcxmlDataField extends BaseMarcxmlField
@SuppressWarnings("unused")
private static final Logger LOGGER = LogManager.getLogger();

private Integer tag;
private String tag;
private Integer ind1;
private Integer ind2;
private List<MarcxmlSubfield> subfields;
Expand All @@ -41,13 +41,8 @@ public class MarcxmlDataField extends BaseMarcxmlField
public MarcxmlDataField(Element element) throws RecordFieldException {

super(element);

try {
tag = Integer.parseInt(element.getAttribute("tag"));
} catch (NumberFormatException e) {
throw new RecordFieldException("Tag value is not an integer.");
}

this.tag = element.getAttribute("tag");
this.ind1 = getIndicatorValue("ind1", element);
this.ind2 = getIndicatorValue("ind2", element);

Expand Down Expand Up @@ -127,7 +122,7 @@ private Integer getIndicatorValue(String ind, Element element) {
}

@Override
public int getTag() {
public String getTag() {
return tag;
}

Expand Down Expand Up @@ -246,10 +241,10 @@ public boolean containsAnySubfield(Character[] codes) {
* Returns the first if multiple are found. Returns null if none are found.
*/
public static MarcxmlDataField get(
List<MarcxmlDataField> fields, int tag) {
List<MarcxmlDataField> fields, String tag) {

for (MarcxmlDataField field: fields) {
if (field.getTag() == tag) {
if (field.getTag().equals(tag)) {
return field;
}
}
Expand All @@ -258,12 +253,12 @@ public static MarcxmlDataField get(

private void isValid() throws RecordFieldException {

if (tag == null) {
throw new RecordFieldException("Tag is null.");
if (StringUtils.isBlank(tag)) {
throw new RecordFieldException("Tag must be non-empty.");
}
if (! (tag > 0 && tag < 1000) ) {
if (tag.length() != 3) {
throw new RecordFieldException(
"Tag value is not between 1 and 999.");
"Tag must be exactly 3 characters long.");
}
/*
* Bad test: when pretty-printed there is whitespace inside the element.
Expand All @@ -274,7 +269,7 @@ private void isValid() throws RecordFieldException {
if (subfields.isEmpty()) {
throw new RecordFieldException("field has no subfields");
}
if (tag == 245) {
if (tag.equals("245")) {
if (! ( hasSubfield('a') || hasSubfield('k') ) ) {
throw new RecordFieldException(
"Subfield $a or $k required for field 245.");
Expand Down

0 comments on commit 36e0b55

Please sign in to comment.