Skip to content

Commit

Permalink
Extract marc21_to_marcxml.sh script, use it in test/batch scripts, co…
Browse files Browse the repository at this point in the history
…nfigured by config/config.sh
  • Loading branch information
dazza-codes committed Mar 8, 2017
1 parent 07a72a5 commit 517ab27
Show file tree
Hide file tree
Showing 11 changed files with 125 additions and 53 deletions.
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ buildNumber.properties

java/src/main/resources/server.conf
java/log
files/

# MARC-XML output files
data/MarcXML/*.xml

# Log files
log/*.log
Expand All @@ -41,4 +43,3 @@ laptop*
# included in this public git repo.
farmshare*

data/*
44 changes: 44 additions & 0 deletions bin/marc21_to_marcxml.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash
#
# Requires one input parameter - the path to a MARC21 binary file.
#
# Process all records in the mrc_file using marc4j and SQL to
# look up authority keys and retrieve any URI values from
# 92X fields and put them in the subfield 0 so that the
# LOC converter can use them correctly.

mrc_file=$1

mrc_name=$(basename ${mrc_file} .mrc)
log_date=$(date +%Y%m%dT%H%M%S)
log_name="${LD4P_LOGS}/${mrc_name}_marc21-to-xml_${log_date}"
log_file="${log_name}.log"
err_file="${log_name}_errors.log"

echo
echo "Converting MARC file: ${mrc_file}"
echo "Output MARC-XML files: ${LD4P_MARCXML}/*.xml"
echo "Logging conversion to: ${log_file}"

# Java library, built from ./java sources and copied to ./lib
jar="${LD4P_LIB}/xform-marc21-to-xml-jar-with-dependencies.jar"

# $ java -cp ${jar} edu.stanford.MarcToXML -h
# usage: edu.stanford.MarcToXML
# -h,--help help message
# -i,--inputFile <arg> MARC input file (binary .mrc file expected; required)
# -l,--logFile <arg> Log file output (default: log/MarcToXML.log)
# -o,--outputPath <arg> MARC XML output path (default: ENV["LD4P_MARCXML"])
# -r,--replace Replace existing XML files (default: false)

java -cp ${jar} edu.stanford.MarcToXML -i ${mrc_file} -o ${LD4P_MARCXML} -l ${log_file} -r

success=$?
if [ ${success} ]; then
echo "Completed conversion."
else
echo "ERROR: Conversion failed for ${mrc_file}" | tee --append ${err_file}
fi

echo
exit ${success}
36 changes: 36 additions & 0 deletions bin/marc21_to_marcxml_batch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

SCRIPT_PATH=$( cd $(dirname $0) && pwd -P )
export LD4P_ROOT=$( cd "${SCRIPT_PATH}/.." && pwd -P )
export LD4P_CONFIG="${LD4P_ROOT}/config/config.sh"
source ${LD4P_CONFIG}

if [ ! -d "${LD4P_MARC}" ]; then
echo "Failed to configure LD4P_MARC data directory: ${LD4P_MARC}"
exit 1
fi

if [ ! -d "${LD4P_MARCXML}" ]; then
echo "Failed to configure LD4P_MARCXML data directory: ${LD4P_MARCXML}"
exit 1
fi

if [ ! -d "${LD4P_LOGS}" ]; then
echo "Failed to configure LD4P_LOGS directory: ${LD4P_LOGS}"
exit 1
fi

echo "Searching MARC files: ${LD4P_MARC}/*.mrc"
for marc_bin in $(find ${LD4P_MARC} -type f -name '*.mrc')
do
${CONVERT_SCRIPT} ${marc_bin}
SUCCESS=$?
if [ ${SUCCESS} ]; then
if [ "${LD4P_ARCHIVE_ENABLED}" == "true" ]; then
# Archive the marc_bin file (preserve timestamps etc.)
rsync -a --update "${marc_bin}" "${LD4P_MARC_ARCHIVE}/" && rm ${marc_bin}
fi
fi
done
echo "Completed MARC files: ${LD4P_MARC}/*.mrc"

67 changes: 16 additions & 51 deletions bin/marc21_to_marcxml_test.sh
Original file line number Diff line number Diff line change
@@ -1,59 +1,24 @@
#!/bin/bash
#
# Requires one input parameter - the path to a MARC21 binary file.
#
# note that log_dir and OUTPUT_DIR are expected to exist already.
#
# Process all records in the mrc_file using marc4j and SQL to
# look up authority keys and retrieve any URI values from
# 92X fields and put them in the subfield 0 so that the
# LOC converter (for Bibframe v1) can use them correctly.

#INPUT_DATA_DIR = '/Symphony/Marc'
#INPUT_DATA_DIR = '../../../data/casalini-raw'
# FIXME: have this be a java property? (in a properties file or java command line -D argument)
# or combine input filename and input file directory into the same arg?
INPUT_DATA_DIR='java/src/test/resources'
SCRIPT_PATH=$( cd $(dirname $0) && pwd -P )
export LD4P_ROOT=$( cd "${SCRIPT_PATH}/.." && pwd -P )
export LD4P_CONFIG="${LD4P_ROOT}/config/config.sh"
source ${LD4P_CONFIG}

# FIXME: this is hardcoded - it should be in a properties file or java command line -D arg)
#OUTPUT_DIR = '../../../data/marcxml_output'
OUTPUT_DIR='../../../data/test'

# vars above this line need to change to process other data
#------------------------------------------------

log_dir='log'

jar_dir='java/target'
jar="${jar_dir}/xform-marc21-to-xml-jar-with-dependencies.jar"

mrc_file="${INPUT_DATA_DIR}/$1"

# this var is used in java code
# FIXME: have this be a java property? (in a properties file or java command line -D argument)
export LD4P_MARCXML=${OUTPUT_DIR}

filename=$(basename ${mrc_file} .mrc)
log_date=$(date +%Y%m%dT%H%M%S)
log_name="${log_dir}/${filename}_marc21-to-xml_${log_date}"
log_file="${log_name}.log"
err_file="${log_name}_errors.log"

echo
echo "Converting MARC file: ${mrc_file}"
echo "Output MARC-XML files: ${LD4P_MARCXML}/*.xml"
echo "Logging conversion to: ${log_file}"

options="-i ${mrc_file} -o ${LD4P_MARCXML} -l ${log_file} -r"
MARC_BIN="${LD4P_MARC}/one_record.mrc"
if [ ! -f ${MARC_BIN} ]; then
echo "Failed to locate MARC21 file: ${MARC_BIN}"
exit 1
fi

java -cp ${jar} edu.stanford.MarcToXML ${options}
${CONVERT_SCRIPT} ${MARC_BIN}

success=$?
if [ ${success} ]; then
echo "Completed conversion."
# Check the conversion worked, it should output this file.
MARC_XML="${LD4P_MARCXML}/1629059.xml"
if [ -s ${MARC_XML} ]; then
echo "SUCCESS created MARC-XML file: ${MARC_XML}"
else
echo "ERROR: Conversion failed for ${mrc_file}" | tee --append ${err_file}
echo "FAILURE to create MARC-XML file: ${MARC_XML}"
exit 1
fi

echo
exit ${success}
25 changes: 25 additions & 0 deletions config/config.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash

# An LD4P_ROOT path must be defined by any scripts calling this configuration.
if [ "$LD4P_ROOT" == "" ]; then
echo "ERROR: The LD4P configuration requires an LD4P_ROOT path: ${LD4P_ROOT}" 1>&2
kill -INT $$
fi

export LD4P_CONFIG="${LD4P_ROOT}/config/config.sh"
export LD4P_LOGS="${LD4P_ROOT}/log"
export LD4P_BIN="${LD4P_ROOT}/bin"
export LD4P_LIB="${LD4P_ROOT}/lib"

export LD4P_DATA="${LD4P_ROOT}/data"
export LD4P_MARC="${LD4P_DATA}/Marc"
export LD4P_MARCXML="${LD4P_DATA}/MarcXML"

export LD4P_ARCHIVE_ENABLED=false
export LD4P_MARC_ARCHIVE="${LD4P_DATA}/Marc_Archive"

CONVERT_SCRIPT="${LD4P_BIN}/marc21_to_marcxml.sh"
if [ ! -f "${CONVERT_SCRIPT}" ]; then
echo "Failed to locate convert script: ${CONVERT_SCRIPT}"
exit 1
fi
Empty file added data/Marc/.keep
Empty file.
1 change: 1 addition & 0 deletions data/Marc/one_record.mrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
01025cam a2200289 a 45000010008000000030006000080050017000140080041000310100016000720200020000880350027001080350022001350350023001570400015001800500020001951000063002152450053002782600050003313000033003815040029004145050153004435960006005966500056006026500049006589010017007079420011007241629059SIRSI19910712000000.0860723m19859999ru ac b 000 0cruso  a 86169543 c1.20rub (ch. 1) a(CStRLIN)CSUG86-B68619 a(OCoLC-M)15600303 a(OCoLC-I)273735471 dCStdOrLoB0 aT212b.S29 19851 aSavelʹev, N. I͡A.q(Nikolaĭ I͡Akovlevich),d1908-1967.10aSyny Altai͡a i Otechestva /cN.I͡A. Savelʹev. aBarnaul :bAltaĭskoe knizhnoe izd-vo,c1985- av. :bill., ports. ;c21 cm. aIncludes bibliographies.1 aCh. 1. Mastera khrustalʹnogo dela. Filipp Vasilʹevich Struzhkov. Kozʹma Dmitrievich Frolov. V starom Salaire -- ch. 2. Mekhanikus Ivan Polzunov - a1 0aInventorszRussia (Federation)zSiberiaxBiography. 0aIndustrieszAltai Mountains RegionxHistory. aVol. 2, 1988 saaxtn
Empty file added data/MarcXML/.keep
Empty file.
Empty file added data/Marc_Archive/.keep
Empty file.
Binary file added lib/xform-marc21-to-xml-jar-with-dependencies.jar
Binary file not shown.
Empty file added log/.keep
Empty file.

0 comments on commit 517ab27

Please sign in to comment.