Skip to content

Commit

Permalink
Uniqueness of PICA field ranges reported wrongly #247
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed May 5, 2023
1 parent 3f69b1b commit 78e25ce
Show file tree
Hide file tree
Showing 9 changed files with 271 additions and 44 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package de.gwdg.metadataqa.marc.analysis.validator;

import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;

import java.util.Objects;

public class RepetitionDao {
private String extendedTag;
private DataFieldDefinition fieldDefinition;

public RepetitionDao(String extendedTag, DataFieldDefinition fieldDefinition) {
this.extendedTag = extendedTag;
this.fieldDefinition = fieldDefinition;
}

public String getExtendedTag() {
return extendedTag;
}

public DataFieldDefinition getFieldDefinition() {
return fieldDefinition;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;

RepetitionDao that = (RepetitionDao) o;

if (!Objects.equals(extendedTag, that.extendedTag)) return false;
return Objects.equals(fieldDefinition, that.fieldDefinition);
}

@Override
public int hashCode() {
int result = extendedTag != null ? extendedTag.hashCode() : 0;
result = 31 * result + (fieldDefinition != null ? fieldDefinition.hashCode() : 0);
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,30 +113,42 @@ private boolean validateControlfields() {
return isValidComponent;
}

private boolean validateDatafields() {
boolean isValidRecord = true;
private void validateDatafields() {
DataFieldValidator validator = new DataFieldValidator(configuration);
ValidatorResponse validatorResponse;
Map<DataFieldDefinition, Integer> repetitionCounter = new HashMap<>();
for (DataField field : marcRecord.getDatafields()) {
if (field.getDefinition() != null && !marcRecord.isIgnorableField(field.getTag(), configuration.getIgnorableFields())) {
count(field.getDefinition(), repetitionCounter);
if (!validator.validate(field)) {
isValidRecord = false;
validationErrors.addAll(filterErrors(validator.getValidationErrors()));
}
Map<RepetitionDao, Integer> repetitionCounter = new HashMap<>();
for (DataField field : marcRecord.getDatafields())
validateDatafield(validator, repetitionCounter, field);

validatorResponse = ClassificationReferenceValidator.validate(field);
if (!validatorResponse.isValid()) {
validationErrors.addAll(filterErrors(validatorResponse.getValidationErrors()));
isValidRecord = false;
}
}
validateRepeatability(repetitionCounter);
}

/**
*
* @param validator
* @param repetitionCounter
* @param field
*/
private void validateDatafield(DataFieldValidator validator,
Map<RepetitionDao, Integer> repetitionCounter,
DataField field) {
ValidatorResponse validatorResponse;
if (field.getDefinition() != null && !marcRecord.isIgnorableField(field.getTag(), configuration.getIgnorableFields())) {
RepetitionDao dao = new RepetitionDao(field.getTagWithOccurrence(), field.getDefinition());
count(dao, repetitionCounter);
if (!validator.validate(field))
validationErrors.addAll(filterErrors(validator.getValidationErrors()));

validatorResponse = ClassificationReferenceValidator.validate(field);
if (!validatorResponse.isValid())
validationErrors.addAll(filterErrors(validatorResponse.getValidationErrors()));
}
}

private void validateRepeatability(Map<RepetitionDao, Integer> repetitionCounter) {
if (!isIgnorableType(ValidationErrorType.FIELD_NONREPEATABLE)) {
for (Map.Entry<DataFieldDefinition, Integer> entry : repetitionCounter.entrySet()) {
DataFieldDefinition fieldDefinition = entry.getKey();
for (Map.Entry<RepetitionDao, Integer> entry : repetitionCounter.entrySet()) {
RepetitionDao dao = entry.getKey();
DataFieldDefinition fieldDefinition = dao.getFieldDefinition();
Integer count = entry.getValue();
if (count > 1
&& fieldDefinition.getCardinality().equals(Cardinality.Nonrepeatable)) {
Expand All @@ -145,10 +157,8 @@ private boolean validateDatafields() {
String.format("there are %d instances", count),
fieldDefinition.getDescriptionUrl()
));
isValidRecord = false;
}
}
}
return isValidRecord;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import de.gwdg.metadataqa.marc.analysis.ThompsonTraillFields;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
import de.gwdg.metadataqa.marc.utils.pica.PicaFieldDefinition;
import de.gwdg.metadataqa.marc.utils.pica.crosswalk.Crosswalk;
import de.gwdg.metadataqa.marc.utils.pica.crosswalk.PicaMarcCrosswalkReader;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package de.gwdg.metadataqa.marc.utils.pica;

import de.gwdg.metadataqa.marc.dao.DataField;
import org.apache.commons.lang3.StringUtils;

import java.io.File;
import java.nio.file.Paths;

public class PicaDatafieldFactory {
private static PicaSchemaManager picaSchemaManager;

private static void initialize() {
initialize(null);
}

private static void initialize(String fileName) {
if (picaSchemaManager == null) {
String schemaFile = fileName != null && StringUtils.isNotEmpty(fileName) && new File(fileName).exists()
? fileName
: Paths.get("src/main/resources/pica/avram-k10plus.json").toAbsolutePath().toString();
picaSchemaManager = PicaSchemaReader.createSchema(schemaFile);
}
}

public static DataField create(String tagWithOccurrence, String... subfields) {
initialize();
DataField dataField = new DataField(picaSchemaManager.lookup(tagWithOccurrence), null, null, subfields);
String[] parts = tagWithOccurrence.split("/");
if (parts.length == 2)
dataField.setOccurrence(parts[1]);

return dataField;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -86,4 +86,16 @@ public PicaFieldDefinition copyWithChangesId() {

return other;
}

@Override
public String toString() {
return "PicaFieldDefinition{" +
super.toString().replace("DataFieldDefinition{", "").replaceFirst(".$", ", ") +
"modified='" + modified + '\'' +
", pica3='" + pica3 + '\'' +
", occurrence='" + occurrence + '\'' +
", range=" + range +
", id='" + id + '\'' +
'}';
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package de.gwdg.metadataqa.marc.analysis.validator;

import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.utils.pica.PicaDatafieldFactory;
import org.junit.Test;

import static org.junit.Assert.*;

public class RepetitionDaoTest {

@Test
public void getExtendedTag() {
DataField df = PicaDatafieldFactory.create("041A/01", "9", "104589787");

RepetitionDao dao = new RepetitionDao(df.getTagWithOccurrence(), df.getDefinition());

assertEquals("041A/01", dao.getExtendedTag());
}

@Test
public void getFieldDefinition() {
DataField df = PicaDatafieldFactory.create("041A/01", "9", "104589787");

RepetitionDao dao = new RepetitionDao(df.getTagWithOccurrence(), df.getDefinition());

assertEquals("041A", dao.getFieldDefinition().getTag());
}

@Test
public void equals_no() {
DataField df1 = PicaDatafieldFactory.create("041A/01", "9", "104589787");
RepetitionDao dao1 = new RepetitionDao(df1.getTagWithOccurrence(), df1.getDefinition());

DataField df2 = PicaDatafieldFactory.create("041A/02", "9", "104589787");
RepetitionDao dao2 = new RepetitionDao(df2.getTagWithOccurrence(), df2.getDefinition());

assertNotEquals(dao1, dao2);
assertFalse(dao1.equals(dao2));
}

@Test
public void equals_yes() {
DataField df1 = PicaDatafieldFactory.create("041A/01", "9", "104589787");
RepetitionDao dao1 = new RepetitionDao(df1.getTagWithOccurrence(), df1.getDefinition());

DataField df2 = PicaDatafieldFactory.create("041A/01", "8", "104589787");
RepetitionDao dao2 = new RepetitionDao(df2.getTagWithOccurrence(), df2.getDefinition());

assertEquals(dao1, dao2);
assertTrue(dao1.equals(dao2));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import de.gwdg.metadataqa.marc.MarcFactory;
import de.gwdg.metadataqa.marc.cli.CliTestUtils;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.dao.record.PicaRecord;
import de.gwdg.metadataqa.marc.definition.MarcFormat;
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
import de.gwdg.metadataqa.marc.model.validation.ValidationErrorType;
import de.gwdg.metadataqa.marc.utils.QAMarcReaderFactory;
import de.gwdg.metadataqa.marc.utils.pica.PicaDatafieldFactory;
import de.gwdg.metadataqa.marc.utils.pica.PicaSchemaManager;
import de.gwdg.metadataqa.marc.utils.pica.PicaSchemaReader;
import org.junit.Test;
Expand All @@ -17,11 +19,12 @@

public class ValidatorTest {

PicaSchemaManager schema = PicaSchemaReader.createSchema(CliTestUtils.getTestResource("pica/k10plus.json"));
Validator validator = new Validator(new ValidatorConfiguration().withSchemaType(SchemaType.PICA));
MarcReader reader;

@Test
public void validate() {
PicaSchemaManager schema = PicaSchemaReader.createSchema(CliTestUtils.getTestResource("pica/k10plus.json"));
try {
reader = QAMarcReaderFactory.getFileReader(MarcFormat.PICA_NORMALIZED, CliTestUtils.getTestResource("pica/pica-with-holdings-info.dat"), null);
} catch (Exception e) {
Expand All @@ -36,7 +39,6 @@ public void validate() {
BibliographicRecord marcRecord = MarcFactory.createPicaFromMarc4j(record, schema);
assertNotNull(marcRecord);

Validator validator = new Validator(new ValidatorConfiguration().withSchemaType(SchemaType.PICA));
boolean valid = validator.validate(marcRecord);
assertFalse(valid);
assertEquals(26, validator.getValidationErrors().size());
Expand All @@ -53,4 +55,36 @@ public void validate() {
assertEquals("013D", validator.getValidationErrors().get(3).getMarcPath());
assertEquals("V", validator.getValidationErrors().get(3).getMessage());
}

@Test
public void validate_valid() {
// arrange
PicaRecord record = new PicaRecord("u2407796");
record.addDataField(PicaDatafieldFactory.create("041A", "9", "106076612"));
record.addDataField(PicaDatafieldFactory.create("041A/01", "9", "104589787"));
record.addDataField(PicaDatafieldFactory.create("041A/02", "z", "Geschichte <1968-1975>"));

// act
boolean isValid = validator.validate(record);

// assert
assertTrue(isValid);
assertTrue(validator.getValidationErrors().isEmpty());
}

@Test
public void validate_invalid() {
// arrange
PicaRecord record = new PicaRecord("u2407796");
record.addDataField(PicaDatafieldFactory.create("041A", "9", "106076612"));
record.addDataField(PicaDatafieldFactory.create("041A/01", "9", "104589787"));
record.addDataField(PicaDatafieldFactory.create("041A/01", "z", "Geschichte <1968-1975>"));

// act
boolean isValid = validator.validate(record);

// assert
assertFalse(isValid);
assertFalse(validator.getValidationErrors().isEmpty());
}
}
Loading

0 comments on commit 78e25ce

Please sign in to comment.