Skip to content

Commit

Permalink
issue #187: add parameter to exclude issue types
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Nov 6, 2022
1 parent 26276f1 commit 40be8e8
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 21 deletions.
25 changes: 23 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ The `./metadata-qa.sh` script has the following options:

## Detailed instructions


We will use the same jar file in every command, so we save its path into a variable.

```bash
Expand Down Expand Up @@ -415,7 +414,29 @@ options:
will be segmented, which should be handled after the process.
* `-t`, `--collectAllErrors`: collect all errors (useful only for validating small number of records). Default is
turned off.

* `-i`, `--ignorableIssueTypes`: comma separated list of issue types not to collect. The valid values are:
* `undetectableType`: undetectable type
* `invalidLinkage`: invalid linkage
* `ambiguousLinkage`: ambiguous linkage
* `obsoleteControlPosition`: obsolete code
* `controlValueContainsInvalidCode`: invalid code
* `hasInvalidValue`: invalid value
* `missingSubfield`: missing reference subfield (880$6)
* `nonrepeatableField`: repetition of non-repeatable field
* `undefinedField`: undefined field
* `obsoleteIndicator`: obsolete value
* `nonEmptyIndicator`: non-empty indicator
* `hasInvalidValue`: invalid value
* `undefinedSubfield`: undefined subfield
* `invalidLength`: invalid length
* `invalidReference`: invalid classification reference
* `patternMismatch`: content does not match any patterns
* `nonrepeatableSubfield`: repetition of non-repeatable subfield
* `invalidISBN`: invalid ISBN
* `invalidISSN`: invalid ISSN
* `unparsableContent`: content is not well-formatted
* `nullCode`: null subfield code
* `hasInvalidValue`: invalid value

Outputs:
* `issue-by-category.csv`: the counts of issues by categories
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,35 @@ public enum ValidationErrorType {

// record
RECORD_UNDETECTABLE_TYPE(1, "undetectableType", "undetectable type", ValidationErrorCategory.RECORD),
RECORD_INVALID_LINKAGE(2, "invalidLinkage", "invalid linkage", ValidationErrorCategory.RECORD),
RECORD_AMBIGUOUS_LINKAGE(3, "ambiguousLinkage", "ambiguous linkage", ValidationErrorCategory.RECORD),
RECORD_INVALID_LINKAGE(2, "invalidLinkage", "invalid linkage", ValidationErrorCategory.RECORD),
RECORD_AMBIGUOUS_LINKAGE(3, "ambiguousLinkage", "ambiguous linkage", ValidationErrorCategory.RECORD),

// control subfield
CONTROL_POSITION_OBSOLETE_CODE(4, "obsoleteControlPosition", "obsolete code", ValidationErrorCategory.CONTROLFIELD),
CONTROL_POSITION_INVALID_CODE(5, "controlValueContainsInvalidCode", "invalid code", ValidationErrorCategory.CONTROLFIELD),
CONTROL_POSITION_INVALID_VALUE(6, "hasInvalidValue", "invalid value", ValidationErrorCategory.CONTROLFIELD),
CONTROL_POSITION_INVALID_CODE(5, "controlValueContainsInvalidCode", "invalid code", ValidationErrorCategory.CONTROLFIELD),
CONTROL_POSITION_INVALID_VALUE(6, "hasInvalidValue", "invalid value", ValidationErrorCategory.CONTROLFIELD),

// field
FIELD_MISSING_REFERENCE_SUBFIELD(7, "missingSubfield", "missing reference subfield (880$6)", ValidationErrorCategory.DATAFIELD),
FIELD_NONREPEATABLE(8, "nonrepeatableField", "repetition of non-repeatable field", ValidationErrorCategory.DATAFIELD),
FIELD_UNDEFINED(9, "undefinedField", "undefined field", ValidationErrorCategory.DATAFIELD),
FIELD_MISSING_REFERENCE_SUBFIELD(7, "missingSubfield", "missing reference subfield (880$6)", ValidationErrorCategory.DATAFIELD),
FIELD_NONREPEATABLE(8, "nonrepeatableField", "repetition of non-repeatable field", ValidationErrorCategory.DATAFIELD),
FIELD_UNDEFINED(9, "undefinedField", "undefined field", ValidationErrorCategory.DATAFIELD),

// indicator
INDICATOR_OBSOLETE(10, "obsoleteIndicator", "obsolete value", ValidationErrorCategory.INDICATOR),
INDICATOR_NON_EMPTY(11, "nonEmptyIndicator", "non-empty indicator", ValidationErrorCategory.INDICATOR),
INDICATOR_INVALID_VALUE(12, "hasInvalidValue", "invalid value", ValidationErrorCategory.INDICATOR),
INDICATOR_OBSOLETE(10, "obsoleteIndicator", "obsolete value", ValidationErrorCategory.INDICATOR),
INDICATOR_NON_EMPTY(11, "nonEmptyIndicator", "non-empty indicator", ValidationErrorCategory.INDICATOR),
INDICATOR_INVALID_VALUE(12, "hasInvalidValue", "invalid value", ValidationErrorCategory.INDICATOR),

// subfield
SUBFIELD_UNDEFINED(13, "undefinedSubfield", "undefined subfield", ValidationErrorCategory.SUBFIELD),
SUBFIELD_INVALID_LENGTH(14, "invalidLength", "invalid length", ValidationErrorCategory.SUBFIELD),
SUBFIELD_INVALID_CLASSIFICATION_REFERENCE(15, "invalidReference", "invalid classification reference", ValidationErrorCategory.SUBFIELD),
SUBFIELD_PATTERN_MISMATCH(16, "patternMismatch", "content does not match any patterns", ValidationErrorCategory.SUBFIELD),
SUBFIELD_NONREPEATABLE(17, "nonrepeatableSubfield", "repetition of non-repeatable subfield", ValidationErrorCategory.SUBFIELD),
SUBFIELD_ISBN(18, "invalidISBN", "invalid ISBN", ValidationErrorCategory.SUBFIELD),
SUBFIELD_ISSN(19, "invalidISSN", "invalid ISSN", ValidationErrorCategory.SUBFIELD),
SUBFIELD_UNPARSABLE_CONTENT(20, "unparsableContent", "content is not well-formatted", ValidationErrorCategory.SUBFIELD),
SUBFIELD_NULL_CODE(21, "nullCode", "null subfield code", ValidationErrorCategory.SUBFIELD),
SUBFIELD_INVALID_VALUE(22, "hasInvalidValue", "invalid value", ValidationErrorCategory.SUBFIELD),
SUBFIELD_INVALID_LENGTH(14, "invalidLength", "invalid length", ValidationErrorCategory.SUBFIELD),
SUBFIELD_INVALID_CLASSIFICATION_REFERENCE(15, "invalidReference", "invalid classification reference", ValidationErrorCategory.SUBFIELD),
SUBFIELD_PATTERN_MISMATCH(16, "patternMismatch", "content does not match any patterns", ValidationErrorCategory.SUBFIELD),
SUBFIELD_NONREPEATABLE(17, "nonrepeatableSubfield", "repetition of non-repeatable subfield", ValidationErrorCategory.SUBFIELD),
SUBFIELD_ISBN(18, "invalidISBN", "invalid ISBN", ValidationErrorCategory.SUBFIELD),
SUBFIELD_ISSN(19, "invalidISSN", "invalid ISSN", ValidationErrorCategory.SUBFIELD),
SUBFIELD_UNPARSABLE_CONTENT(20, "unparsableContent", "content is not well-formatted", ValidationErrorCategory.SUBFIELD),
SUBFIELD_NULL_CODE(21, "nullCode", "null subfield code", ValidationErrorCategory.SUBFIELD),
SUBFIELD_INVALID_VALUE(22, "hasInvalidValue", "invalid value", ValidationErrorCategory.SUBFIELD),
;

private final int id;
Expand Down

0 comments on commit 40be8e8

Please sign in to comment.