Skip to content

Commit

Permalink
issue #142: calculating PICA completeness
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Jun 16, 2022
1 parent bfdf840 commit 49b9232
Show file tree
Hide file tree
Showing 7 changed files with 20,861 additions and 8 deletions.
12 changes: 5 additions & 7 deletions src/main/java/de/gwdg/metadataqa/marc/cli/Completeness.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import de.gwdg.metadataqa.marc.*;
import de.gwdg.metadataqa.marc.cli.parameters.CommonParameters;
import de.gwdg.metadataqa.marc.cli.parameters.CompletenessParameters;
import de.gwdg.metadataqa.marc.cli.plugin.CompletenessFactory;
import de.gwdg.metadataqa.marc.cli.plugin.CompletenessPlugin;
import de.gwdg.metadataqa.marc.cli.processor.BibliographicInputProcessor;
import de.gwdg.metadataqa.marc.cli.utils.RecordIterator;
import de.gwdg.metadataqa.marc.dao.DataField;
Expand Down Expand Up @@ -56,11 +58,13 @@ public class Completeness implements BibliographicInputProcessor, Serializable {
// private Map<String, Integer> fieldMap = new HashMap<>();
private Map<String, Map<Integer, Integer>> fieldHistogram = new HashMap<>();
private boolean readyToProcess;
private CompletenessPlugin plugin;

public Completeness(String[] args) throws ParseException {
parameters = new CompletenessParameters(args);
options = parameters.getOptions();
readyToProcess = true;
plugin = CompletenessFactory.create(parameters);
}

public static void main(String[] args) {
Expand Down Expand Up @@ -103,13 +107,7 @@ public void processRecord(MarcRecord marcRecord, int recordNumber) throws IOExce
Map<String, Integer> recordPackageCounter = new TreeMap<>();
// private Map<String, Map<String, Integer>>

String documentType = "dummy";
if (parameters.isMarc21())
documentType = marcRecord.getType().getValue();
else if (parameters.isPica()) {
String[] parts = parameters.getPicaRecordTypeField().split(Pattern.quote(parameters.getPicaSubfieldSeparator()));
documentType = marcRecord.getDatafield(parts[0]).get(0).getSubfield(parts[1]).get(0).getValue();
}
String documentType = plugin.getDocumentType(marcRecord);
elementCardinality.computeIfAbsent(documentType, s -> new TreeMap<>());
elementFrequency.computeIfAbsent(documentType, s -> new TreeMap<>());

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package de.gwdg.metadataqa.marc.cli.plugin;

import de.gwdg.metadataqa.marc.cli.parameters.CompletenessParameters;

public class CompletenessFactory {

public static CompletenessPlugin create(CompletenessParameters parameters) {
if (parameters.isMarc21()) {
return new Marc21CompletenessPlugin(parameters);
} else if (parameters.isPica()) {
return new PicaCompletenessPlugin(parameters);
}
return null;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package de.gwdg.metadataqa.marc.cli.plugin;

import de.gwdg.metadataqa.marc.dao.MarcRecord;

public interface CompletenessPlugin {
String getDocumentType(MarcRecord marcRecord);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package de.gwdg.metadataqa.marc.cli.plugin;

import de.gwdg.metadataqa.marc.cli.parameters.CompletenessParameters;
import de.gwdg.metadataqa.marc.dao.MarcRecord;

import java.util.regex.Pattern;

public class Marc21CompletenessPlugin implements CompletenessPlugin {
private final CompletenessParameters parameters;

public Marc21CompletenessPlugin(CompletenessParameters parameters) {
this.parameters = parameters;
}

@Override
public String getDocumentType(MarcRecord marcRecord) {
return marcRecord.getType().getValue();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package de.gwdg.metadataqa.marc.cli.plugin;

import de.gwdg.metadataqa.marc.cli.parameters.CompletenessParameters;
import de.gwdg.metadataqa.marc.dao.MarcRecord;

import java.util.regex.Pattern;

public class PicaCompletenessPlugin implements CompletenessPlugin {
private final CompletenessParameters parameters;
private final String field;
private final String subfield;

public PicaCompletenessPlugin(CompletenessParameters parameters) {
this.parameters = parameters;
String[] parts = parameters.getPicaRecordTypeField().split(Pattern.quote(parameters.getPicaSubfieldSeparator()));
field = parts[0];
subfield = parts[1];
}

@Override
public String getDocumentType(MarcRecord marcRecord) {
return marcRecord.getDatafield(field).get(0).getSubfield(subfield).get(0).getValue();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public void start() {
if (parameters.getSchemaType().equals(SchemaType.PICA)) {
String schemaFile = StringUtils.isNotEmpty(parameters.getPicaSchemaFile())
? parameters.getPicaSchemaFile()
: Paths.get("src/main/resources/pica/k10plus.json").toAbsolutePath().toString();
: Paths.get("src/main/resources/pica/avram-k10plus.json").toAbsolutePath().toString();
picaSchema = PicaSchemaReader.create(schemaFile);
}

Expand Down
Loading

0 comments on commit 49b9232

Please sign in to comment.