Skip to content

Commit

Permalink
issue #199: Group results in completeness
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Dec 20, 2022
1 parent 920f765 commit 669a762
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 15 deletions.
16 changes: 8 additions & 8 deletions src/main/java/de/gwdg/metadataqa/marc/MarcFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -134,19 +134,19 @@ public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
* @param marc4jRecord The Marc4j record
* @param defaultType The defauld document type
* @param marcVersion The MARC version
* @param replecementInControlFields A ^ or # character which sould be replaced with space in control fields
* @param replacementInControlFields A ^ or # character which sould be replaced with space in control fields
* @return
*/
public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType,
MarcVersion marcVersion,
String replecementInControlFields) {
String replacementInControlFields) {
var marcRecord = new Marc21Record();

if (marc4jRecord.getLeader() != null) {
String data = marc4jRecord.getLeader().marshal();
if (replecementInControlFields != null)
data = data.replace(replecementInControlFields, " ");
if (replacementInControlFields != null)
data = data.replace(replacementInControlFields, " ");
marcRecord.setLeader(new Leader(data, defaultType));

if (marcRecord.getType() == null) {
Expand All @@ -159,7 +159,7 @@ public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
}
}

importMarc4jControlFields(marc4jRecord, marcRecord, replecementInControlFields);
importMarc4jControlFields(marc4jRecord, marcRecord, replacementInControlFields);

importMarc4jDataFields(marc4jRecord, marcRecord, marcVersion);

Expand All @@ -179,11 +179,11 @@ public static BibliographicRecord createPicaFromMarc4j(Record marc4jRecord, Pica

private static void importMarc4jControlFields(Record marc4jRecord,
BibliographicRecord marcRecord,
String replecementInControlFields) {
String replacementInControlFields) {
for (ControlField controlField : marc4jRecord.getControlFields()) {
String data = controlField.getData();
if (replecementInControlFields != null && isFixable(controlField.getTag()))
data = data.replace(replecementInControlFields, " ");
if (replacementInControlFields != null && isFixable(controlField.getTag()))
data = data.replace(replacementInControlFields, " ");
switch (controlField.getTag()) {
case "001":
marcRecord.setControl001(new Control001(data)); break;
Expand Down
23 changes: 21 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
package de.gwdg.metadataqa.marc.cli;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ser.FilterProvider;
import com.fasterxml.jackson.databind.ser.impl.SimpleBeanPropertyFilter;
import com.fasterxml.jackson.databind.ser.impl.SimpleFilterProvider;
import de.gwdg.metadataqa.marc.EncodedValue;
import de.gwdg.metadataqa.marc.analysis.completeness.CompletenessDAO;
import de.gwdg.metadataqa.marc.analysis.completeness.RecordCompleteness;
import de.gwdg.metadataqa.marc.definition.general.codelist.OrganizationCodes;
import de.gwdg.metadataqa.marc.utils.BibiographicPath;
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
import de.gwdg.metadataqa.marc.cli.parameters.CompletenessParameters;
import de.gwdg.metadataqa.marc.cli.plugin.CompletenessFactory;
Expand All @@ -18,7 +22,6 @@
import de.gwdg.metadataqa.marc.model.validation.ValidationErrorFormat;
import de.gwdg.metadataqa.marc.utils.BasicStatistics;
import de.gwdg.metadataqa.marc.utils.TagHierarchy;
import de.gwdg.metadataqa.marc.utils.pica.path.PicaPathParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
Expand Down Expand Up @@ -146,6 +149,22 @@ private <T extends Object> void count(T key, Map<T, Integer> counter) {
public void beforeIteration() {
logger.info(parameters.formatParameters());
completenessDAO.initialize();

ObjectMapper mapper = new ObjectMapper();
try {
FilterProvider filters = new SimpleFilterProvider()
.addFilter("myFilter", SimpleBeanPropertyFilter.serializeAllExcept("options"));

String json = mapper.writer(filters).writeValueAsString(parameters);
Path path = Paths.get(parameters.getOutputDir(), "completeness.params.json");
try (var writer = Files.newBufferedWriter(path)) {
writer.write(json);
} catch (IOException e) {
throw new RuntimeException(e);
}
} catch (JsonProcessingException e) {
throw new RuntimeException(e);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package de.gwdg.metadataqa.marc.cli.parameters;

import com.fasterxml.jackson.annotation.JsonIgnore;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordIgnorator;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordIgnoratorFactory;
import de.gwdg.metadataqa.marc.cli.utils.ignorablerecords.RecordFilter;
Expand Down Expand Up @@ -48,6 +49,7 @@ public class CommonParameters implements Serializable {
protected InputStream stream = null;
protected String defaultEncoding = null;

@JsonIgnore
protected Options options = new Options();
protected static final CommandLineParser parser = new DefaultParser();
protected CommandLine cmd;
Expand Down Expand Up @@ -389,7 +391,7 @@ public void setFixKbr(boolean fixKbr) {
this.fixKbr = fixKbr;
}

public String getReplecementInControlFields() {
public String getReplacementInControlFields() {
if (fixAlephseq())
return "^";
else if (fixAlma() || fixKbr())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public static void main(String[] args) throws ParseException {
MarcReader reader = QAMarcReaderFactory.getStringReader(MarcFormat.ISO, content);
Record marc4jRecord = reader.next();
BibliographicRecord marcRecord = MarcFactory.createFromMarc4j(
marc4jRecord, params.getDefaultRecordType(), params.getMarcVersion(), params.getReplecementInControlFields());
marc4jRecord, params.getDefaultRecordType(), params.getMarcVersion(), params.getReplacementInControlFields());
validatorCli.processRecord(marcRecord, 1);
Validator analyzer = new Validator(validatorConfiguration);
analyzer.validate(marcRecord);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public class RecordIterator {
private int i = 0;
private String lastKnownId = "";
private CommonParameters parameters;
private String replecementInControlFields;
private String replacementInControlFields;
private MarcVersion marcVersion;
private Leader.Type defaultRecordType;
private DecimalFormat decimalFormat;
Expand All @@ -61,7 +61,7 @@ public void start() {

marcVersion = parameters.getMarcVersion();
defaultRecordType = parameters.getDefaultRecordType();
replecementInControlFields = parameters.getReplecementInControlFields();
replacementInControlFields = parameters.getReplacementInControlFields();
decimalFormat = new DecimalFormat();
if (parameters.isPica()) {
String schemaFile = StringUtils.isNotEmpty(parameters.getPicaSchemaFile())
Expand Down Expand Up @@ -189,7 +189,7 @@ private void processContent(MarcReader reader, String fileName) {

private BibliographicRecord transformMarcRecord(Record marc4jRecord) {
if (parameters.getSchemaType().equals(SchemaType.MARC21))
return MarcFactory.createFromMarc4j(marc4jRecord, defaultRecordType, marcVersion, replecementInControlFields);
return MarcFactory.createFromMarc4j(marc4jRecord, defaultRecordType, marcVersion, replacementInControlFields);
else
return MarcFactory.createPicaFromMarc4j(marc4jRecord, picaSchema);
}
Expand Down

0 comments on commit 669a762

Please sign in to comment.