Skip to content

Commit

Permalink
issue #163: PICA: general changes
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Aug 31, 2022
1 parent b43a7d9 commit 4f89f64
Show file tree
Hide file tree
Showing 94 changed files with 460 additions and 338 deletions.
63 changes: 32 additions & 31 deletions src/main/java/de/gwdg/metadataqa/marc/MarcFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
import de.gwdg.metadataqa.marc.dao.Control008;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.Leader;
import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.dao.record.Marc21Record;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.dao.record.PicaRecord;
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.MarcVersion;
Expand All @@ -28,7 +30,6 @@
import de.gwdg.metadataqa.marc.utils.pica.PicaFieldDefinition;
import de.gwdg.metadataqa.marc.utils.pica.PicaLine;
import de.gwdg.metadataqa.marc.utils.pica.PicaSchemaManager;
import de.gwdg.metadataqa.marc.utils.pica.PicaSchemaReader;
import de.gwdg.metadataqa.marc.utils.pica.PicaSubfield;
import net.minidev.json.JSONArray;
import org.marc4j.marc.ControlField;
Expand Down Expand Up @@ -57,12 +58,12 @@ private MarcFactory() {
throw new IllegalStateException("This is a utility class, can not be instantiated");
}

public static MarcRecord create(JsonPathCache cache) {
public static BibliographicRecord create(JsonPathCache cache) {
return create(cache, MarcVersion.MARC21);
}

public static MarcRecord create(JsonPathCache cache, MarcVersion version) {
var marcRecord = new MarcRecord();
public static BibliographicRecord create(JsonPathCache cache, MarcVersion version) {
var marcRecord = new Marc21Record();
for (JsonBranch branch : schema.getPaths()) {
if (branch.getParent() != null)
continue;
Expand Down Expand Up @@ -109,23 +110,23 @@ public static MarcRecord create(JsonPathCache cache, MarcVersion version) {
return marcRecord;
}

public static MarcRecord createFromMarc4j(Record marc4jRecord) {
public static BibliographicRecord createFromMarc4j(Record marc4jRecord) {
return createFromMarc4j(marc4jRecord, null, MarcVersion.MARC21);
}

public static MarcRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType) {
public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType) {
return createFromMarc4j(marc4jRecord, defaultType, MarcVersion.MARC21);
}

public static MarcRecord createFromMarc4j(Record marc4jRecord,
MarcVersion marcVersion) {
public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
MarcVersion marcVersion) {
return createFromMarc4j(marc4jRecord, null, marcVersion);
}

public static MarcRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType,
MarcVersion marcVersion) {
public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType,
MarcVersion marcVersion) {
return createFromMarc4j(marc4jRecord, defaultType, marcVersion, null);
}

Expand All @@ -137,11 +138,11 @@ public static MarcRecord createFromMarc4j(Record marc4jRecord,
* @param replecementInControlFields A ^ or # character which sould be replaced with space in control fields
* @return
*/
public static MarcRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType,
MarcVersion marcVersion,
String replecementInControlFields) {
var marcRecord = new MarcRecord();
public static BibliographicRecord createFromMarc4j(Record marc4jRecord,
Leader.Type defaultType,
MarcVersion marcVersion,
String replecementInControlFields) {
var marcRecord = new Marc21Record();

if (marc4jRecord.getLeader() != null) {
String data = marc4jRecord.getLeader().marshal();
Expand All @@ -166,8 +167,8 @@ public static MarcRecord createFromMarc4j(Record marc4jRecord,
return marcRecord;
}

public static MarcRecord createPicaFromMarc4j(Record marc4jRecord, PicaSchemaManager picaSchemaManager) {
var marcRecord = new MarcRecord();
public static BibliographicRecord createPicaFromMarc4j(Record marc4jRecord, PicaSchemaManager picaSchemaManager) {
var marcRecord = new PicaRecord();
marcRecord.setSchemaType(SchemaType.PICA);

importMarc4jControlFields(marc4jRecord, marcRecord, null);
Expand All @@ -178,7 +179,7 @@ public static MarcRecord createPicaFromMarc4j(Record marc4jRecord, PicaSchemaMan
}

private static void importMarc4jControlFields(Record marc4jRecord,
MarcRecord marcRecord,
BibliographicRecord marcRecord,
String replecementInControlFields) {
for (ControlField controlField : marc4jRecord.getControlFields()) {
String data = controlField.getData();
Expand Down Expand Up @@ -208,7 +209,7 @@ private static boolean isFixable(String tag) {
}

private static void importMarc4jDataFields(Record marc4jRecord,
MarcRecord marcRecord,
BibliographicRecord marcRecord,
MarcVersion marcVersion) {
for (org.marc4j.marc.DataField dataField : marc4jRecord.getDataFields()) {
var definition = getDataFieldDefinition(dataField, marcVersion);
Expand All @@ -221,7 +222,7 @@ private static void importMarc4jDataFields(Record marc4jRecord,
}

private static void importMarc4jDataFields(Record marc4jRecord,
MarcRecord marcRecord,
BibliographicRecord marcRecord,
PicaSchemaManager schema) {
for (org.marc4j.marc.DataField dataField : marc4jRecord.getDataFields()) {
var definition = schema.lookup(dataField.getTag());
Expand Down Expand Up @@ -325,23 +326,23 @@ private static String extractFirst(JsonPathCache cache, JsonBranch branch) {
return null;
}

public static MarcRecord createFromFormattedText(String marcRecordAsText) {
public static BibliographicRecord createFromFormattedText(String marcRecordAsText) {
return createFromFormattedText(Arrays.asList(marcRecordAsText.split("\n")));
}

public static MarcRecord createFromFormattedText(String marcRecordAsText, MarcVersion marcVersion) {
public static BibliographicRecord createFromFormattedText(String marcRecordAsText, MarcVersion marcVersion) {
return createFromFormattedText(Arrays.asList(marcRecordAsText.split("\n")), marcVersion);
}

public static MarcRecord createFromFormattedText(List<String> lines) {
public static BibliographicRecord createFromFormattedText(List<String> lines) {
return createFromFormattedText(lines, MarcVersion.MARC21);
}

public static MarcRecord createFromFormattedText(List<String> lines, MarcVersion marcVersion) {
public static BibliographicRecord createFromFormattedText(List<String> lines, MarcVersion marcVersion) {
if (marcVersion == null)
marcVersion = MarcVersion.MARC21;

var marcRecord = new MarcRecord();
var marcRecord = new Marc21Record();
for (String line : lines) {
if (line.startsWith("LEADER ")) {
marcRecord.setLeader(line.replace("LEADER ", ""), marcVersion);
Expand All @@ -354,12 +355,12 @@ public static MarcRecord createFromFormattedText(List<String> lines, MarcVersion
return marcRecord;
}

public static MarcRecord createFromAlephseq(List<AlephseqLine> lines,
MarcVersion marcVersion) {
public static BibliographicRecord createFromAlephseq(List<AlephseqLine> lines,
MarcVersion marcVersion) {
if (marcVersion == null)
marcVersion = MarcVersion.MARC21;

var marcRecord = new MarcRecord();
var marcRecord = new Marc21Record();
for (AlephseqLine line : lines) {
if (line.isLeader()) {
marcRecord.setLeader(line.getContent());
Expand Down
8 changes: 4 additions & 4 deletions src/main/java/de/gwdg/metadataqa/marc/MarcSubfield.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package de.gwdg.metadataqa.marc;

import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.definition.*;
import de.gwdg.metadataqa.marc.definition.general.Linkage;
import de.gwdg.metadataqa.marc.definition.general.parser.ParserException;
Expand All @@ -23,7 +23,7 @@ public class MarcSubfield implements Validatable, Serializable {

private static final Logger logger = Logger.getLogger(MarcSubfield.class.getCanonicalName());

private MarcRecord marcRecord;
private BibliographicRecord marcRecord;
private DataField field;
private SubfieldDefinition definition;
private final String code;
Expand Down Expand Up @@ -93,11 +93,11 @@ public void setDefinition(SubfieldDefinition definition) {
this.definition = definition;
}

public MarcRecord getMarcRecord() {
public BibliographicRecord getMarcRecord() {
return marcRecord;
}

public void setMarcRecord(MarcRecord marcRecord) {
public void setMarcRecord(BibliographicRecord marcRecord) {
this.marcRecord = marcRecord;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package de.gwdg.metadataqa.marc.analysis;

import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.MarcSubfield;
import de.gwdg.metadataqa.marc.cli.utils.Schema;
import de.gwdg.metadataqa.marc.definition.SourceSpecificationType;
Expand All @@ -28,10 +28,10 @@ public class AuthorithyAnalyzer {
);
private static final Pattern NUMERIC = Pattern.compile("^\\d");

private MarcRecord marcRecord;
private BibliographicRecord marcRecord;
private AuthorityStatistics authoritiesStatistics;

public AuthorithyAnalyzer(MarcRecord marcRecord,
public AuthorithyAnalyzer(BibliographicRecord marcRecord,
AuthorityStatistics authoritiesStatistics) {
this.marcRecord = marcRecord;
this.authoritiesStatistics = authoritiesStatistics;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import de.gwdg.metadataqa.marc.analysis.bl.UseCase;
import de.gwdg.metadataqa.marc.analysis.bl.Element;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -35,7 +35,7 @@ else if (useCase.getBand().equals(EFFECTIVE))
}

@Override
public String classify(MarcRecord marcRecord) {
public String classify(BibliographicRecord marcRecord) {
String level = DEFICIENT.name();

for (UseCase useCase : basicUseCases)
Expand All @@ -55,7 +55,7 @@ public String classify(MarcRecord marcRecord) {
return level;
}

private boolean satisfy(MarcRecord marcRecord, UseCase useCase) {
private boolean satisfy(BibliographicRecord marcRecord, UseCase useCase) {
for (Element element : useCase.getElements()) {
if (marcRecord.hasDatafield(element.getTag())) {
if (element.getSubfield() == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import de.gwdg.metadataqa.marc.Utils;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.MarcSubfield;
import de.gwdg.metadataqa.marc.cli.utils.Schema;
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
Expand Down Expand Up @@ -32,7 +32,7 @@ public class ClassificationAnalyzer {
private static final Pattern NUMERIC = Pattern.compile("^\\d");

private final ClassificationStatistics statistics;
private MarcRecord marcRecord;
private BibliographicRecord marcRecord;
private List<Schema> schemasInRecord;

private static final List<String> fieldsWithIndicator1AndSubfield2 = Arrays.asList(
Expand Down Expand Up @@ -87,7 +87,7 @@ public class ClassificationAnalyzer {
new FieldWithScheme("045R", "Regensburger Verbundklassifikation")
);

public ClassificationAnalyzer(MarcRecord marcRecord, ClassificationStatistics statistics) {
public ClassificationAnalyzer(BibliographicRecord marcRecord, ClassificationStatistics statistics) {
this.marcRecord = marcRecord;
this.statistics = statistics;
}
Expand Down Expand Up @@ -180,7 +180,7 @@ private int processFieldsWithIndicator1AndSubfield2(int total) {
return total;
}

private int processFieldWithScheme(MarcRecord marcRecord,
private int processFieldWithScheme(BibliographicRecord marcRecord,
FieldWithScheme fieldEntry) {
var count = 0;
final String tag = fieldEntry.getTag();
Expand Down Expand Up @@ -222,7 +222,7 @@ private int processFieldWithScheme(MarcRecord marcRecord,
return count;
}

private int processFieldWithSchemePica(MarcRecord marcRecord,
private int processFieldWithSchemePica(BibliographicRecord marcRecord,
FieldWithScheme fieldEntry) {
var count = 0;
final String tag = fieldEntry.getTag();
Expand Down Expand Up @@ -269,7 +269,7 @@ private void registerSchemas(List<Schema> schemas) {
schemasInRecord.addAll(uniqSchemas);
}

private int processFieldWithIndicator1AndSubfield2(MarcRecord marcRecord, String tag) {
private int processFieldWithIndicator1AndSubfield2(BibliographicRecord marcRecord, String tag) {
var count = 0;
if (!marcRecord.hasDatafield(tag))
return count;
Expand Down Expand Up @@ -302,7 +302,7 @@ private int processFieldWithIndicator1AndSubfield2(MarcRecord marcRecord, String
return count;
}

private int processFieldWithIndicator2AndSubfield2(MarcRecord marcRecord, String tag) {
private int processFieldWithIndicator2AndSubfield2(BibliographicRecord marcRecord, String tag) {
var count = 0;
if (!marcRecord.hasDatafield(tag))
return count;
Expand Down Expand Up @@ -333,7 +333,7 @@ private int processFieldWithIndicator2AndSubfield2(MarcRecord marcRecord, String
return count;
}

private int processFieldWithSubfield2(MarcRecord marcRecord, String tag) {
private int processFieldWithSubfield2(BibliographicRecord marcRecord, String tag) {
var count = 0;
if (!marcRecord.hasDatafield(tag))
return count;
Expand All @@ -351,7 +351,7 @@ private int processFieldWithSubfield2(MarcRecord marcRecord, String tag) {
return count;
}

private int processFieldWithoutSource(MarcRecord marcRecord, String tag) {
private int processFieldWithoutSource(BibliographicRecord marcRecord, String tag) {
var count = 0;
if (!marcRecord.hasDatafield(tag))
return count;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package de.gwdg.metadataqa.marc.analysis;

import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;

public interface Classifier {
String classify(MarcRecord marcRecord);
String classify(BibliographicRecord marcRecord);
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import de.gwdg.metadataqa.marc.MarcSubfield;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import org.apache.commons.io.FileUtils;

import java.io.File;
Expand Down Expand Up @@ -45,7 +45,7 @@ public DataElementCounter(String dir, String fileName, Basis basis) {
this.header = _header;
}

public List<Integer> count(MarcRecord marcRecord) {
public List<Integer> count(BibliographicRecord marcRecord) {
List<Integer> counts = new ArrayList<>();
for (Map.Entry<String, List<DataElement>> entry : tags.entrySet()) {
List<DataField> instances = marcRecord.getDatafield(entry.getKey());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
package de.gwdg.metadataqa.marc.analysis;

import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.dao.MarcRecord;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;

import java.util.*;

public class NetworkAnalyzer {

private final MarcRecord marcRecord;
private final BibliographicRecord marcRecord;
private final Set<DataField> collector;

public NetworkAnalyzer(MarcRecord marcRecord) {
public NetworkAnalyzer(BibliographicRecord marcRecord) {
this.marcRecord = marcRecord;
collector = new HashSet<>();
}
Expand Down

0 comments on commit 4f89f64

Please sign in to comment.