Skip to content

Commit

Permalink
issue #174: PICA FRBR functions
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Oct 4, 2022
1 parent 5c0cf6f commit db64588
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 52 deletions.
36 changes: 23 additions & 13 deletions src/main/java/de/gwdg/metadataqa/marc/cli/FunctionalAnalysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import de.gwdg.metadataqa.marc.dao.MarcPositionalControlField;
import de.gwdg.metadataqa.marc.dao.record.BibliographicRecord;
import de.gwdg.metadataqa.marc.definition.ControlValue;
import de.gwdg.metadataqa.marc.definition.bibliographic.SchemaType;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.FRBRFunction;
import de.gwdg.metadataqa.marc.definition.structure.Indicator;
Expand Down Expand Up @@ -83,8 +84,8 @@ public void processRecord(Record marc4jRecord, int recordNumber) throws IOExcept
}

@Override
public void processRecord(BibliographicRecord marcRecord, int recordNumber) throws IOException {
if (parameters.getRecordIgnorator().isIgnorable(marcRecord))
public void processRecord(BibliographicRecord bibliographicRecord, int recordNumber) throws IOException {
if (parameters.getRecordIgnorator().isIgnorable(bibliographicRecord))
return;

this.recordNumber = recordNumber;
Expand All @@ -95,9 +96,11 @@ public void processRecord(BibliographicRecord marcRecord, int recordNumber) thro

Map<DataFieldDefinition, Boolean> cache = new HashMap<>();

countPositionalControlField(recordCounter, marcRecord.getLeader());
countControlFields(recordCounter, marcRecord.getControlfields());
countDataFields(recordCounter, marcRecord.getDatafields(), cache);
if (bibliographicRecord.getSchemaType().equals(SchemaType.MARC21)) {
countPositionalControlField(recordCounter, bibliographicRecord.getLeader());
countControlFields(recordCounter, bibliographicRecord.getControlfields());
}
countDataFields(recordCounter, bibliographicRecord.getDatafields(), bibliographicRecord.getSchemaType(), cache);

frbrFunctionLister.calculatePercent(recordCounter);

Expand All @@ -107,21 +110,23 @@ public void processRecord(BibliographicRecord marcRecord, int recordNumber) thro

private void countDataFields(Map<FRBRFunction, FunctionValue> recordCounter,
List<DataField> dataFields,
SchemaType schemaType,
Map<DataFieldDefinition, Boolean> cache) {
for (DataField dataField : dataFields) {
DataFieldDefinition definition = dataField.getDefinition();
if (!cache.containsKey(definition)) {
cache.put(definition, true);
if (definition != null) {
if (definition != null && schemaType.equals(SchemaType.MARC21)) {
countIndicator(recordCounter, definition.getInd1(), dataField.getInd1());
countIndicator(recordCounter, definition.getInd2(), dataField.getInd2());
}
for (MarcSubfield subfield : dataField.getSubfields()) {
if (subfield.getDefinition() != null
&& subfield.getDefinition().getFrbrFunctions() != null) {
FrbrFunctionLister.countFunctions(
subfield.getDefinition().getFrbrFunctions(), recordCounter);
}
if (schemaType.equals(SchemaType.MARC21)) {
for (MarcSubfield subfield : dataField.getSubfields())
if (subfield.getDefinition() != null && subfield.getDefinition().getFrbrFunctions() != null)
FrbrFunctionLister.countFunctions(subfield.getDefinition().getFrbrFunctions(), recordCounter);
} else if (schemaType.equals(SchemaType.PICA)) {
if (frbrFunctionLister.getFunctionByPicaPath().containsKey(dataField.getTag()))
FrbrFunctionLister.countFunctions(frbrFunctionLister.getFunctionByPicaPath().get(dataField.getTag()), recordCounter);
}
}
}
Expand Down Expand Up @@ -196,7 +201,12 @@ public void afterIteration(int numberOfprocessedRecords) {

private void saveMapping(String fileExtension,
char separator) {
Map<FRBRFunction, List<String>> functions = frbrFunctionLister.getMarcPathByfunction();
Map<FRBRFunction, List<String>> functions = null;
if (parameters.getSchemaType().equals(SchemaType.MARC21))
functions = frbrFunctionLister.getMarcPathByFunction();
else if (parameters.getSchemaType().equals(SchemaType.PICA))
functions = frbrFunctionLister.getPicaPathByFunction();

var path = Paths.get(parameters.getOutputDir(), "functional-analysis-mapping" + fileExtension);
try (var writer = Files.newBufferedWriter(path)) {
writer.write("frbrfunction" + separator + "count" + separator + "fields\n");
Expand Down
57 changes: 38 additions & 19 deletions src/main/java/de/gwdg/metadataqa/marc/utils/FrbrFunctionLister.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ public class FrbrFunctionLister {
private Map<FRBRFunction, Counter<FunctionValue>> histogram;

private Map<String, List<FRBRFunction>> functionByMarcPath;
private AppendableHashMap<FRBRFunction, String> marcPathByfunction;
private Map<FRBRFunction, List<String>> picaPathByfunction;
private Map<String, List<FRBRFunction>> functionByPicaPath;
private AppendableHashMap<FRBRFunction, String> marcPathByFunction;
private Map<FRBRFunction, List<String>> picaPathByFunction;

public FrbrFunctionLister(MarcVersion marcVersion) {
this.marcVersion = marcVersion;
Expand All @@ -52,7 +53,7 @@ public Map<FRBRFunction, Counter<FunctionValue>> getHistogram() {
public void prepareBaseline() {
elementsWithoutFunctions = 0;
functionByMarcPath = new TreeMap<>();
marcPathByfunction = new AppendableHashMap<>();
marcPathByFunction = new AppendableHashMap<>();

for (ControlfieldPositionDefinition subfield : MarcDefinition.getLeaderPositions())
registerFunctions(subfield.getFrbrFunctions(), subfield.getPath(false));
Expand Down Expand Up @@ -94,7 +95,7 @@ private void registerFunctions(List<FRBRFunction> functions, String marcPath) {
if (functions != null && !functions.isEmpty()) {
functionByMarcPath.put(marcPath, functions);
for (FRBRFunction function : functions) {
marcPathByfunction.append(function, marcPath);
marcPathByFunction.append(function, marcPath);
baselineCounter.count(function);
}
} else {
Expand Down Expand Up @@ -161,26 +162,44 @@ public Map<FRBRFunction, Integer> getBaseline() {
return baselineCounter.getMap();
}

public Map<FRBRFunction, List<String>> getMarcPathByfunction() {
return marcPathByfunction.getMap();
public Map<FRBRFunction, List<String>> getMarcPathByFunction() {
return marcPathByFunction.getMap();
}

public Map<FRBRFunction, List<String>> getPicaPathByfunction() {
if (picaPathByfunction == null) {
picaPathByfunction = new HashMap<>();
for (Map.Entry<FRBRFunction, List<String>> entry : marcPathByfunction.entrySet()) {
for (String address : entry.getValue()) {
if (address.contains("$")) {
String key = address.replace("$", " $");
for (Crosswalk crosswalk : PicaMarcCrosswalkReader.lookupMarc21(key)) {
if (!picaPathByfunction.containsKey(entry.getKey()))
picaPathByfunction.put(entry.getKey(), new ArrayList<>());
picaPathByfunction.get(entry.getKey()).add(crosswalk.getPica());
}
public Map<FRBRFunction, List<String>> getPicaPathByFunction() {
if (picaPathByFunction == null) {
initializePica();
}
return picaPathByFunction;
}

public Map<String, List<FRBRFunction>> getFunctionByPicaPath() {
if (functionByPicaPath == null) {
initializePica();
}
return functionByPicaPath;
}

private void initializePica() {
picaPathByFunction = new HashMap<>();
functionByPicaPath = new HashMap<>();
for (Map.Entry<FRBRFunction, List<String>> entry : marcPathByFunction.entrySet()) {
for (String address : entry.getValue()) {
if (address.contains("$")) {
FRBRFunction function = entry.getKey();
String key = address.replace("$", " $");
for (Crosswalk crosswalk : PicaMarcCrosswalkReader.lookupMarc21(key)) {
String pica = crosswalk.getPica();
if (!picaPathByFunction.containsKey(function))
picaPathByFunction.put(function, new ArrayList<>());
picaPathByFunction.get(function).add(pica);

if (!functionByPicaPath.containsKey(pica))
functionByPicaPath.put(pica, new ArrayList<>());
functionByPicaPath.get(pica).add(function);
}
}
}
}
return picaPathByfunction;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,19 @@

import de.gwdg.metadataqa.marc.definition.FRBRFunction;
import de.gwdg.metadataqa.marc.definition.MarcVersion;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.structure.Indicator;
import de.gwdg.metadataqa.marc.definition.structure.SubfieldDefinition;
import org.junit.Test;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;

import static de.gwdg.metadataqa.marc.Utils.createRow;
import static org.junit.Assert.assertEquals;

public class FrbrFunctionListerTest {

@Test
public void testGetMarcPathByfunction() {
FrbrFunctionLister lister = new FrbrFunctionLister(MarcVersion.MARC21);
Map<FRBRFunction, List<String>> functions = lister.getMarcPathByfunction();
Map<FRBRFunction, List<String>> functions = lister.getMarcPathByFunction();
assertEquals(12, functions.size());
assertEquals(454, functions.get(FRBRFunction.DiscoveryObtain).size());
assertEquals(464, functions.get(FRBRFunction.DiscoverySearch).size());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,22 +1,13 @@
package de.gwdg.metadataqa.marc.utils.pica.crosswalk;

import de.gwdg.metadataqa.marc.Utils;
import de.gwdg.metadataqa.marc.definition.FRBRFunction;
import de.gwdg.metadataqa.marc.definition.MarcVersion;
import de.gwdg.metadataqa.marc.definition.structure.DataFieldDefinition;
import de.gwdg.metadataqa.marc.definition.structure.SubfieldDefinition;
import de.gwdg.metadataqa.marc.utils.FrbrFunctionLister;
import de.gwdg.metadataqa.marc.utils.MarcTagLister;
import org.junit.Test;

import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static de.gwdg.metadataqa.marc.definition.FRBRFunction.DiscoverySelect;
import static org.junit.Assert.assertEquals;

public class PicaMarcCrosswalkReaderTest {
Expand All @@ -30,7 +21,7 @@ public void test() {
@Test
public void testLoad() {
FrbrFunctionLister lister = new FrbrFunctionLister(MarcVersion.MARC21);
Map<FRBRFunction, List<String>> picaFunctions = lister.getPicaPathByfunction();
Map<FRBRFunction, List<String>> picaFunctions = lister.getPicaPathByFunction();
assertEquals(11, picaFunctions.size());
assertEquals(167, picaFunctions.get(FRBRFunction.DiscoveryObtain).size());
assertEquals(178, picaFunctions.get(FRBRFunction.DiscoverySearch).size());
Expand All @@ -45,5 +36,4 @@ public void testLoad() {
assertEquals( 17, picaFunctions.get(FRBRFunction.UseManage).size());
assertEquals( 4, picaFunctions.get(FRBRFunction.UseOperate).size());
}

}

0 comments on commit db64588

Please sign in to comment.