Skip to content

Commit

Permalink
issue #163: separation of PICA and MARC21
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Sep 27, 2022
1 parent 0afa4c4 commit 138022a
Show file tree
Hide file tree
Showing 11 changed files with 106 additions and 84 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,7 @@ public ClassificationAnalyzer(BibliographicRecord marcRecord, ClassificationStat
this.marcRecord = marcRecord;
this.statistics = statistics;
if (marcRecord.getSchemaType().equals(SchemaType.PICA) && manager == null) {
try {
manager = new PicaVocabularyManager(this.getClass().getResourceAsStream("/pica/vocabularies.json"));
} catch (ParseException | IOException e) {
e.printStackTrace();
}
manager = PicaVocabularyManager.getInstance();
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/de/gwdg/metadataqa/marc/dao/DataField.java
Original file line number Diff line number Diff line change
Expand Up @@ -390,10 +390,10 @@ public Map<String, List<String>> getKeyValuePairs(SolrFieldType type,
}

// classifications
if (marcRecord != null && marcRecord.isClassificationTag(this.getTag())) {
if (marcRecord != null && marcRecord.isSubjectTag(this.getTag())) {
List<String> full = new ArrayList<>();
for (MarcSubfield subfield : subfields) {
if (!marcRecord.isSkippableClassificationSubfield(this.getTag(), subfield.getCode())) {
if (!marcRecord.isSkippableSubjectSubfield(this.getTag(), subfield.getCode())) {
String value = subfield.getValue();
/*
if (marcRecord.getSchemaType().equals(SchemaType.PICA)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -798,18 +798,8 @@ public Map<DataField, AuthorityCategory> getAuthorityFields(Map<AuthorityCategor
abstract public Map<DataField, AuthorityCategory> getAuthorityFieldsMap();
abstract public boolean isAuthorityTag(String tag);
abstract public boolean isSkippableAuthoritySubfield(String tag, String code);
abstract public boolean isClassificationTag(String tag);
abstract public boolean isSkippableClassificationSubfield(String tag, String code);

/*
public List<DataField> getAuthorityFields() {
List<String> tags = Arrays.asList(
"100", "110", "111", "130",
"700", "710", "711", "730", "720", "740", "751", "752", "753", "754",
"800", "810", "811", "830"
);
}
*/
abstract public boolean isSubjectTag(String tag);
abstract public boolean isSkippableSubjectSubfield(String tag, String code);

public List<DataField> getSubjects() {
List<DataField> subjects = new ArrayList<>();
Expand Down
40 changes: 21 additions & 19 deletions src/main/java/de/gwdg/metadataqa/marc/dao/record/Marc21Record.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import de.gwdg.metadataqa.marc.analysis.AuthorityCategory;
import de.gwdg.metadataqa.marc.analysis.ThompsonTraillFields;
import de.gwdg.metadataqa.marc.dao.DataField;
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;

import java.util.Arrays;
import java.util.HashMap;
Expand All @@ -14,11 +13,11 @@
public class Marc21Record extends BibliographicRecord {

private static List<String> authorityTags;
private static List<String> claasificationTags;
private static List<String> subjectTags;
private static Map<String, Boolean> authorityTagsIndex;
private static Map<String, Boolean> claasificationTagsIndex;
private static Map<String, Map<String, Boolean>> authorityTagsSkippableSubfields;
private static Map<String, Map<String, Boolean>> claasificationTagsSkippableSubfields;
private static Map<String, Boolean> subjectTagIndex;
private static Map<String, Map<String, Boolean>> skippableAuthoritySubfields;
private static Map<String, Map<String, Boolean>> skippableSubjectSubfields;
private static Map<AuthorityCategory, List<String>> authorityTagsMap;
private static Map<ThompsonTraillFields, List<String>> ttTagsMap;

Expand Down Expand Up @@ -56,28 +55,28 @@ public boolean isSkippableAuthoritySubfield(String tag, String code) {
if (authorityTagsIndex == null)
initializeAuthorityTags();

if (!authorityTagsSkippableSubfields.containsKey(tag))
if (!skippableAuthoritySubfields.containsKey(tag))
return false;

return authorityTagsSkippableSubfields.get(tag).getOrDefault(tag, false);
return skippableAuthoritySubfields.get(tag).getOrDefault(tag, false);
}

public boolean isClassificationTag(String tag) {
if (claasificationTagsIndex == null) {
public boolean isSubjectTag(String tag) {
if (subjectTagIndex == null) {
initializeAuthorityTags();
}
return claasificationTagsIndex.getOrDefault(tag, false);
return subjectTagIndex.getOrDefault(tag, false);
}

public boolean isSkippableClassificationSubfield(String tag, String code) {
if (claasificationTagsIndex == null)
public boolean isSkippableSubjectSubfield(String tag, String code) {
if (subjectTagIndex == null)
initializeAuthorityTags();

if (!claasificationTagsSkippableSubfields.containsKey(tag))
if (!skippableSubjectSubfields.containsKey(tag))
return false;

// System.err.println();
return claasificationTagsSkippableSubfields.get(tag).getOrDefault(code, false);
return skippableSubjectSubfields.get(tag).getOrDefault(code, false);
}

private void initializeAuthorityTags() {
Expand All @@ -88,12 +87,15 @@ private void initializeAuthorityTags() {
);
authorityTagsIndex = Utils.listToMap(authorityTags);

authorityTagsSkippableSubfields = new HashMap<>();
skippableAuthoritySubfields = new HashMap<>();

claasificationTags = Arrays.asList();
claasificationTagsIndex = Utils.listToMap(claasificationTags);
// authorityTagsSkippableSubfields.put("028A", Utils.listToMap(Arrays.asList("9", "V", "7", "3")));
claasificationTagsSkippableSubfields = new HashMap<>();
subjectTags = Arrays.asList(
"052", "055", "072", "080", "082", "083", "084", "085", "086",
"600", "610", "611", "630", "647", "648", "650", "651",
"653", "654", "655", "656", "657", "658", "662"
);
subjectTagIndex = Utils.listToMap(subjectTags);
skippableSubjectSubfields = new HashMap<>();

authorityTagsMap = new HashMap<>();
authorityTagsMap.put(AuthorityCategory.Personal, List.of("100", "700", "800"));
Expand Down
68 changes: 33 additions & 35 deletions src/main/java/de/gwdg/metadataqa/marc/dao/record/PicaRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
public class PicaRecord extends BibliographicRecord {

private static List<String> authorityTags;
private static List<String> claasificationTags;
private static List<String> subjectTags;
private static Map<String, Boolean> authorityTagsIndex;
private static Map<String, Boolean> claasificationTagsIndex;
private static Map<String, Map<String, Boolean>> authorityTagsSkippableSubfields;
private static Map<String, Map<String, Boolean>> claasificationTagsSkippableSubfields;
private static Map<String, Boolean> subjectTagIndex;
private static Map<String, Map<String, Boolean>> skippableAuthoritySubfields;
private static Map<String, Map<String, Boolean>> skippableSubjectSubfields;
private static Map<AuthorityCategory, List<String>> authorityTagsMap;

public PicaRecord() {
Expand Down Expand Up @@ -52,29 +52,28 @@ public boolean isSkippableAuthoritySubfield(String tag, String code) {
if (authorityTagsIndex == null)
initializeAuthorityTags();

if (!authorityTagsSkippableSubfields.containsKey(tag))
if (!skippableAuthoritySubfields.containsKey(tag))
return false;

// System.err.println();
return authorityTagsSkippableSubfields.get(tag).getOrDefault(code, false);
return skippableAuthoritySubfields.get(tag).getOrDefault(code, false);
}

public boolean isClassificationTag(String tag) {
if (claasificationTagsIndex == null) {
public boolean isSubjectTag(String tag) {
if (subjectTagIndex == null) {
initializeAuthorityTags();
}
return claasificationTagsIndex.getOrDefault(tag, false);
return subjectTagIndex.getOrDefault(tag, false);
}

public boolean isSkippableClassificationSubfield(String tag, String code) {
if (claasificationTagsIndex == null)
public boolean isSkippableSubjectSubfield(String tag, String code) {
if (subjectTagIndex == null)
initializeAuthorityTags();

if (!claasificationTagsSkippableSubfields.containsKey(tag))
if (!skippableSubjectSubfields.containsKey(tag))
return false;

// System.err.println();
return claasificationTagsSkippableSubfields.get(tag).getOrDefault(code, false);
return skippableSubjectSubfields.get(tag).getOrDefault(code, false);
}

public Map<DataField, AuthorityCategory> getAuthorityFieldsMap() {
Expand All @@ -84,7 +83,6 @@ public Map<DataField, AuthorityCategory> getAuthorityFieldsMap() {
return getAuthorityFields(authorityTagsMap);
}


private static void initializeAuthorityTags() {
authorityTags = Arrays.asList(
"022A", // Werktitel und sonstige unterscheidende Merkmale des Werks
Expand All @@ -109,28 +107,28 @@ private static void initializeAuthorityTags() {
);
authorityTagsIndex = Utils.listToMap(authorityTags);

authorityTagsSkippableSubfields = new HashMap<>();
authorityTagsSkippableSubfields.put("022A", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("028A", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("028B", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("028C", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("028E", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("028G", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("029A", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("029E", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("029F", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("029G", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("033D", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("033H", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
authorityTagsSkippableSubfields.put("033J", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));

claasificationTags = Arrays.asList(
skippableAuthoritySubfields = new HashMap<>();
skippableAuthoritySubfields.put("022A", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("028A", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("028B", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("028C", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("028E", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("028G", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("029A", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("029E", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("029F", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("029G", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("033D", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("033H", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableAuthoritySubfields.put("033J", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));

subjectTags = Arrays.asList(
"045A", "045B", "045F", "045R", "045C", "045E", "045G"
);
claasificationTagsIndex = Utils.listToMap(claasificationTags);
claasificationTagsSkippableSubfields = new HashMap<>();
claasificationTagsSkippableSubfields.put("022A", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
claasificationTagsSkippableSubfields.put("045R", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
subjectTagIndex = Utils.listToMap(subjectTags);
skippableSubjectSubfields = new HashMap<>();
skippableSubjectSubfields.put("022A", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));
skippableSubjectSubfields.put("045R", Utils.listToMap(Arrays.asList("9", "V", "7", "3", "w")));

authorityTagsMap = new HashMap<>();
authorityTagsMap.put(AuthorityCategory.Titles, List.of("022A", "022A"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,47 @@ public class PicaVocabularyManager {
private JSONParser parser = new JSONParser(JSONParser.MODE_RFC4627);
private Map<String, VocabularyEntry> map = new HashMap<>();
private static final Pattern PATTERN = Pattern.compile("^\\^(\\w|\\[\\w+\\])(.*)$");
private static PicaVocabularyManager instance;

public PicaVocabularyManager(String filename) throws FileNotFoundException, ParseException {
public static PicaVocabularyManager getInstance() {
if (instance == null) {
try {
instance = new PicaVocabularyManager(PicaVocabularyManager.class.getResourceAsStream("/pica/vocabularies.json"));
} catch (FileNotFoundException | ParseException e) {
throw new RuntimeException(e);
}
}
return instance;
}

public static PicaVocabularyManager getInstance(InputStream inputStream) {
if (instance == null) {
try {
instance = new PicaVocabularyManager(inputStream);
} catch (FileNotFoundException | ParseException e) {
throw new RuntimeException(e);
}
}
return instance;
}

public static PicaVocabularyManager getInstance(String filename) {
if (instance == null) {
try {
instance = new PicaVocabularyManager(filename);
} catch (FileNotFoundException | ParseException e) {
throw new RuntimeException(e);
}
}
return instance;
}

private PicaVocabularyManager(String filename) throws FileNotFoundException, ParseException {
Object jsonObject = parser.parse(new FileReader(new File(filename)));
read(jsonObject);
}

public PicaVocabularyManager(InputStream inputStream) throws FileNotFoundException, ParseException {
private PicaVocabularyManager(InputStream inputStream) throws FileNotFoundException, ParseException {
try {
Object jsonObject = parser.parse(new InputStreamReader(inputStream, "UTF-8"));
read(jsonObject);
Expand Down
12 changes: 7 additions & 5 deletions src/test/java/de/gwdg/metadataqa/marc/MarcFactoryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ public void mainTest() throws IOException, URISyntaxException {
// System.err.println(record.formatForIndex());
// System.err.println(record.getKeyValuePairs());
Map<String, List<String>> pairs = marcRecord.getKeyValuePairs(SolrFieldType.HUMAN);
assertEquals(122, pairs.size());
assertEquals(124, pairs.size());
Set<String> keys = pairs.keySet();
// keys.remove("GentLocallyDefinedField");
// keys.remove("BemerkungenZurTitelaufnahme");
Expand Down Expand Up @@ -64,10 +64,10 @@ public void mainTest() throws IOException, URISyntaxException {
"SystemControlNumber, SystemControlNumber_organization, " +
"AdminMetadata_languageOfCataloging, " +
"AdminMetadata_transcribingAgency, AdminMetadata_descriptionConventions, AdminMetadata_catalogingAgency, " +
"Language_translationIndication, Language, Language_sourceOfCode, Place_country, " +
"Language_translationIndication, Language, Language_sourceOfCode, Place_country, ClassificationDdc_full, " +
"ClassificationDdc_editionType, ClassificationDdc_classificationSource, ClassificationDdc, " +
"Classification_classificationPortion, Classification_classificationPortion_zdbs, " +
"Classification_source, Title_subtitle, " +
"Classification_source, Classification_full, Title_subtitle, " +
"Title_responsibilityStatement, Title_mainTitle, Title_titleAddedEntry, " +
"Title_nonfilingCharacters, Title_partName, ParallelTitle_mainTitle, ParallelTitle_type, " +
"ParallelTitle_displayText, ParallelTitle_noteAndAddedEntry, " +
Expand Down Expand Up @@ -451,7 +451,7 @@ public void getKeyValuePairTest() throws IOException, URISyntaxException {

BibliographicRecord marcRecord = MarcFactory.create(cache, MarcVersion.DNB);
Map<String, List<String>> pairs = marcRecord.getKeyValuePairs(SolrFieldType.MIXED);
assertEquals(122, pairs.size());
assertEquals(124, pairs.size());

Set<String> keys = pairs.keySet();
keys.remove("591a_GentLocallyDefinedField");
Expand Down Expand Up @@ -521,8 +521,10 @@ public void getKeyValuePairTest() throws IOException, URISyntaxException {
"082ind2_ClassificationDdc_classificationSource, " +
"082ind1_ClassificationDdc_editionType, " +
"082a_ClassificationDdc, " +
"084a_Classification_classificationPortion, " +
"082_ClassificationDdc_full, " +
"084_Classification_full, " +
"0842_Classification_source, " +
"084a_Classification_classificationPortion, " +
"084a_Classification_classificationPortion_zdbs, " +
"245a_Title_mainTitle, " +
"245ind1_Title_titleAddedEntry, " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public void testIndexing710() throws IOException, URISyntaxException {
List<String> lines = FileUtils.readLinesFromResource("marctxt/010000011.mrctxt");
BibliographicRecord marcRecord = MarcFactory.createFromFormattedText(lines);
Map<String, List<String>> index = marcRecord.getKeyValuePairs(SolrFieldType.MIXED, MarcVersion.DNB);
assertEquals(138, index.size());
assertEquals(140, index.size());
assertEquals("(DE-576)19168161X",
index.get("7100_AddedCorporateName_authorityRecordControlNumber")
.get(0));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ public void testMarcRecordFunctions() {
assertEquals(expected, formatted);

Map<String, List<String>> pairs = marcRecord.getKeyValuePairs();
assertEquals(93, pairs.size());
assertEquals(95, pairs.size());

List<String> hits = marcRecord.search("001", "000000002");
assertEquals(1, hits.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ public void testMarcRecordFunctions() {
assertEquals(expected, formatted);

Map<String, List<String>> pairs = marcRecord.getKeyValuePairs();
assertEquals(138, pairs.size());
assertEquals(140, pairs.size());

List<String> hits = marcRecord.search("001", "010000011");
assertEquals(1, hits.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public class PicaVocabularyManagerTest {

@Test
public void constructor() throws FileNotFoundException, ParseException {
PicaVocabularyManager manager = new PicaVocabularyManager(getPath("pica/vocabularies.json"));
PicaVocabularyManager manager = PicaVocabularyManager.getInstance(getPath("pica/vocabularies.json"));

VocabularyEntry entry = manager.get("045A");
assertNotNull(entry);
Expand Down

0 comments on commit 138022a

Please sign in to comment.