-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Extract marc21_to_marcxml.sh script, use it in test/batch scripts, co…
…nfigured by config/config.sh
- Loading branch information
1 parent
07a72a5
commit 517ab27
Showing
11 changed files
with
125 additions
and
53 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#!/bin/bash | ||
# | ||
# Requires one input parameter - the path to a MARC21 binary file. | ||
# | ||
# Process all records in the mrc_file using marc4j and SQL to | ||
# look up authority keys and retrieve any URI values from | ||
# 92X fields and put them in the subfield 0 so that the | ||
# LOC converter can use them correctly. | ||
|
||
mrc_file=$1 | ||
|
||
mrc_name=$(basename ${mrc_file} .mrc) | ||
log_date=$(date +%Y%m%dT%H%M%S) | ||
log_name="${LD4P_LOGS}/${mrc_name}_marc21-to-xml_${log_date}" | ||
log_file="${log_name}.log" | ||
err_file="${log_name}_errors.log" | ||
|
||
echo | ||
echo "Converting MARC file: ${mrc_file}" | ||
echo "Output MARC-XML files: ${LD4P_MARCXML}/*.xml" | ||
echo "Logging conversion to: ${log_file}" | ||
|
||
# Java library, built from ./java sources and copied to ./lib | ||
jar="${LD4P_LIB}/xform-marc21-to-xml-jar-with-dependencies.jar" | ||
|
||
# $ java -cp ${jar} edu.stanford.MarcToXML -h | ||
# usage: edu.stanford.MarcToXML | ||
# -h,--help help message | ||
# -i,--inputFile <arg> MARC input file (binary .mrc file expected; required) | ||
# -l,--logFile <arg> Log file output (default: log/MarcToXML.log) | ||
# -o,--outputPath <arg> MARC XML output path (default: ENV["LD4P_MARCXML"]) | ||
# -r,--replace Replace existing XML files (default: false) | ||
|
||
java -cp ${jar} edu.stanford.MarcToXML -i ${mrc_file} -o ${LD4P_MARCXML} -l ${log_file} -r | ||
|
||
success=$? | ||
if [ ${success} ]; then | ||
echo "Completed conversion." | ||
else | ||
echo "ERROR: Conversion failed for ${mrc_file}" | tee --append ${err_file} | ||
fi | ||
|
||
echo | ||
exit ${success} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/bin/bash | ||
|
||
SCRIPT_PATH=$( cd $(dirname $0) && pwd -P ) | ||
export LD4P_ROOT=$( cd "${SCRIPT_PATH}/.." && pwd -P ) | ||
export LD4P_CONFIG="${LD4P_ROOT}/config/config.sh" | ||
source ${LD4P_CONFIG} | ||
|
||
if [ ! -d "${LD4P_MARC}" ]; then | ||
echo "Failed to configure LD4P_MARC data directory: ${LD4P_MARC}" | ||
exit 1 | ||
fi | ||
|
||
if [ ! -d "${LD4P_MARCXML}" ]; then | ||
echo "Failed to configure LD4P_MARCXML data directory: ${LD4P_MARCXML}" | ||
exit 1 | ||
fi | ||
|
||
if [ ! -d "${LD4P_LOGS}" ]; then | ||
echo "Failed to configure LD4P_LOGS directory: ${LD4P_LOGS}" | ||
exit 1 | ||
fi | ||
|
||
echo "Searching MARC files: ${LD4P_MARC}/*.mrc" | ||
for marc_bin in $(find ${LD4P_MARC} -type f -name '*.mrc') | ||
do | ||
${CONVERT_SCRIPT} ${marc_bin} | ||
SUCCESS=$? | ||
if [ ${SUCCESS} ]; then | ||
if [ "${LD4P_ARCHIVE_ENABLED}" == "true" ]; then | ||
# Archive the marc_bin file (preserve timestamps etc.) | ||
rsync -a --update "${marc_bin}" "${LD4P_MARC_ARCHIVE}/" && rm ${marc_bin} | ||
fi | ||
fi | ||
done | ||
echo "Completed MARC files: ${LD4P_MARC}/*.mrc" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,59 +1,24 @@ | ||
#!/bin/bash | ||
# | ||
# Requires one input parameter - the path to a MARC21 binary file. | ||
# | ||
# note that log_dir and OUTPUT_DIR are expected to exist already. | ||
# | ||
# Process all records in the mrc_file using marc4j and SQL to | ||
# look up authority keys and retrieve any URI values from | ||
# 92X fields and put them in the subfield 0 so that the | ||
# LOC converter (for Bibframe v1) can use them correctly. | ||
|
||
#INPUT_DATA_DIR = '/Symphony/Marc' | ||
#INPUT_DATA_DIR = '../../../data/casalini-raw' | ||
# FIXME: have this be a java property? (in a properties file or java command line -D argument) | ||
# or combine input filename and input file directory into the same arg? | ||
INPUT_DATA_DIR='java/src/test/resources' | ||
SCRIPT_PATH=$( cd $(dirname $0) && pwd -P ) | ||
export LD4P_ROOT=$( cd "${SCRIPT_PATH}/.." && pwd -P ) | ||
export LD4P_CONFIG="${LD4P_ROOT}/config/config.sh" | ||
source ${LD4P_CONFIG} | ||
|
||
# FIXME: this is hardcoded - it should be in a properties file or java command line -D arg) | ||
#OUTPUT_DIR = '../../../data/marcxml_output' | ||
OUTPUT_DIR='../../../data/test' | ||
|
||
# vars above this line need to change to process other data | ||
#------------------------------------------------ | ||
|
||
log_dir='log' | ||
|
||
jar_dir='java/target' | ||
jar="${jar_dir}/xform-marc21-to-xml-jar-with-dependencies.jar" | ||
|
||
mrc_file="${INPUT_DATA_DIR}/$1" | ||
|
||
# this var is used in java code | ||
# FIXME: have this be a java property? (in a properties file or java command line -D argument) | ||
export LD4P_MARCXML=${OUTPUT_DIR} | ||
|
||
filename=$(basename ${mrc_file} .mrc) | ||
log_date=$(date +%Y%m%dT%H%M%S) | ||
log_name="${log_dir}/${filename}_marc21-to-xml_${log_date}" | ||
log_file="${log_name}.log" | ||
err_file="${log_name}_errors.log" | ||
|
||
echo | ||
echo "Converting MARC file: ${mrc_file}" | ||
echo "Output MARC-XML files: ${LD4P_MARCXML}/*.xml" | ||
echo "Logging conversion to: ${log_file}" | ||
|
||
options="-i ${mrc_file} -o ${LD4P_MARCXML} -l ${log_file} -r" | ||
MARC_BIN="${LD4P_MARC}/one_record.mrc" | ||
if [ ! -f ${MARC_BIN} ]; then | ||
echo "Failed to locate MARC21 file: ${MARC_BIN}" | ||
exit 1 | ||
fi | ||
|
||
java -cp ${jar} edu.stanford.MarcToXML ${options} | ||
${CONVERT_SCRIPT} ${MARC_BIN} | ||
|
||
success=$? | ||
if [ ${success} ]; then | ||
echo "Completed conversion." | ||
# Check the conversion worked, it should output this file. | ||
MARC_XML="${LD4P_MARCXML}/1629059.xml" | ||
if [ -s ${MARC_XML} ]; then | ||
echo "SUCCESS created MARC-XML file: ${MARC_XML}" | ||
else | ||
echo "ERROR: Conversion failed for ${mrc_file}" | tee --append ${err_file} | ||
echo "FAILURE to create MARC-XML file: ${MARC_XML}" | ||
exit 1 | ||
fi | ||
|
||
echo | ||
exit ${success} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#!/bin/bash | ||
|
||
# An LD4P_ROOT path must be defined by any scripts calling this configuration. | ||
if [ "$LD4P_ROOT" == "" ]; then | ||
echo "ERROR: The LD4P configuration requires an LD4P_ROOT path: ${LD4P_ROOT}" 1>&2 | ||
kill -INT $$ | ||
fi | ||
|
||
export LD4P_CONFIG="${LD4P_ROOT}/config/config.sh" | ||
export LD4P_LOGS="${LD4P_ROOT}/log" | ||
export LD4P_BIN="${LD4P_ROOT}/bin" | ||
export LD4P_LIB="${LD4P_ROOT}/lib" | ||
|
||
export LD4P_DATA="${LD4P_ROOT}/data" | ||
export LD4P_MARC="${LD4P_DATA}/Marc" | ||
export LD4P_MARCXML="${LD4P_DATA}/MarcXML" | ||
|
||
export LD4P_ARCHIVE_ENABLED=false | ||
export LD4P_MARC_ARCHIVE="${LD4P_DATA}/Marc_Archive" | ||
|
||
CONVERT_SCRIPT="${LD4P_BIN}/marc21_to_marcxml.sh" | ||
if [ ! -f "${CONVERT_SCRIPT}" ]; then | ||
echo "Failed to locate convert script: ${CONVERT_SCRIPT}" | ||
exit 1 | ||
fi |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
01025cam a2200289 a 45000010008000000030006000080050017000140080041000310100016000720200020000880350027001080350022001350350023001570400015001800500020001951000063002152450053002782600050003313000033003815040029004145050153004435960006005966500056006026500049006589010017007079420011007241629059SIRSI19910712000000.0860723m19859999ru ac b 000 0cruso a 86169543 c1.20rub (ch. 1) a(CStRLIN)CSUG86-B68619 a(OCoLC-M)15600303 a(OCoLC-I)273735471 dCStdOrLoB0 aT212b.S29 19851 aSavelʹev, N. I͡A.q(Nikolaĭ I͡Akovlevich),d1908-1967.10aSyny Altai͡a i Otechestva /cN.I͡A. Savelʹev. aBarnaul :bAltaĭskoe knizhnoe izd-vo,c1985- av. :bill., ports. ;c21 cm. aIncludes bibliographies.1 aCh. 1. Mastera khrustalʹnogo dela. Filipp Vasilʹevich Struzhkov. Kozʹma Dmitrievich Frolov. V starom Salaire -- ch. 2. Mekhanikus Ivan Polzunov - a1 0aInventorszRussia (Federation)zSiberiaxBiography. 0aIndustrieszAltai Mountains RegionxHistory. aVol. 2, 1988 saaxtn |
Empty file.
Empty file.
Binary file not shown.