Skip to content

Commit

Permalink
Improve scripts.
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Jan 11, 2021
1 parent 1b3cf47 commit 20be577
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 22 deletions.
51 changes: 32 additions & 19 deletions common-script
Original file line number Diff line number Diff line change
Expand Up @@ -37,47 +37,52 @@ do_prepare_solr() {
}

do_index() {
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [index]\n" $(date +"%F %T")
printf "%s %s> ./index --db $NAME --file-path ${MARC_DIR} --file-mask $MASK ${TYPE_PARAMS} --trimId 2> ${PREFIX}/solr.log\n" $(date +"%F %T")
./index --db $NAME --file-path ${MARC_DIR} --file-mask $MASK ${TYPE_PARAMS} --trimId 2>> ${PREFIX}/solr.log
printf "%s %s> ./index --db $NAME --file-path ${MARC_DIR} --file-mask $MASK ${PARAMS} --trimId 2> ${PREFIX}/solr.log\n" $(date +"%F %T")
./index --db $NAME --file-path ${MARC_DIR} --file-mask $MASK ${PARAMS} --trimId 2>> ${PREFIX}/solr.log
}

do_completeness() {
PARAMS=$(echo ${TYPE_PARAMS} | sed 's/--emptyLargeCollectors//')
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [completeness]\n" $(date +"%F %T")
printf "%s %s> ./completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/completeness.log\n" $(date +"%F %T")
./completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/completeness.log
}

do_classifications() {
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [classifications]\n" $(date +"%F %T")
printf "%s %s> ./classifications --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/classifications.log\n" $(date +"%F %T")
./classifications --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/classifications.log
printf "%s %s> ./classifications --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/classifications.log\n" $(date +"%F %T")
./classifications --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/classifications.log
printf "%s %s> Rscript scripts/classifications-type.R ${OUTPUT_DIR} 2>> ${PREFIX}/classifications.log\n" $(date +"%F %T")
Rscript scripts/classifications-type.R ${OUTPUT_DIR}
}

do_authorities() {
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [authorities]\n" $(date +"%F %T")
printf "%s %s> ./authorities --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/authorities.log\n" $(date +"%F %T")
./authorities --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/authorities.log
printf "%s %s> ./authorities --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/authorities.log\n" $(date +"%F %T")
./authorities --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/authorities.log
}

do_tt_completeness() {
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [tt-completeness]\n" $(date +"%F %T")
printf "%s %s> ./tt-completeness --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/tt-completeness.log\n" $(date +"%F %T")
./tt-completeness --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/tt-completeness.log
printf "%s %s> ./tt-completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/tt-completeness.log\n" $(date +"%F %T")
./tt-completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/tt-completeness.log

printf "%s %s> Rscript scripts/tt-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/tt-completeness.log\n" $(date +"%F %T")
Rscript scripts/tt-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/tt-completeness.log
}

do_shelf_ready_completeness() {
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [shelf-ready-completeness]\n" $(date +"%F %T")
printf "%s %s> ./shelf-ready-completeness --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/shelf-ready-completeness.log\n" $(date +"%F %T")
printf "%s %s> ./shelf-ready-completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/shelf-ready-completeness.log\n" $(date +"%F %T")
./shelf-ready-completeness \
--defaultRecordType BOOKS \
${TYPE_PARAMS} \
${PARAMS} \
--outputDir ${OUTPUT_DIR}/ \
--trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/shelf-ready-completeness.log

Expand All @@ -86,9 +91,13 @@ do_shelf_ready_completeness() {
}

do_serial_score() {
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [serial-score]\n" $(date +"%F %T")
printf "%s %s> ./serial-score --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/serial-score.log\n" $(date +"%F %T")
./serial-score --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/serial-score.log
printf "%s %s> ./serial-score --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/serial-score.log\n" $(date +"%F %T")
./serial-score --defaultRecordType BOOKS \
${PARAMS} \
--outputDir ${OUTPUT_DIR}/ \
--trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/serial-score.log

printf "%s %s> Rscript scripts/serial-score-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/serial-score.log\n" $(date +"%F %T")
Rscript scripts/serial-score-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/serial-score.log
Expand All @@ -99,17 +108,20 @@ do_format() {
}

do_functional_analysis() {
PARAMS=$(echo ${TYPE_PARAMS} | sed 's/--emptyLargeCollectors//')
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [functional-analysis]\n" $(date +"%F %T")
printf "%s %s> ./functional-analysis --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/functional-analysis.log\n" $(date +"%F %T")
./functional-analysis --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/functional-analysis.log
./functional-analysis --defaultRecordType BOOKS \
${PARAMS} \
--outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/functional-analysis.log
}

do_network_analysis() {
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [network-analysis]\n" $(date +"%F %T")
printf "%s %s> ./network-analysis --defaultRecordType BOOKS ${TYPE_PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/network-analysis.log\n" $(date +"%F %T")
printf "%s %s> ./network-analysis --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/network-analysis.log\n" $(date +"%F %T")
./network-analysis --defaultRecordType BOOKS \
${TYPE_PARAMS} \
${PARAMS} \
--outputDir ${OUTPUT_DIR}/ \
${MARC_DIR}/${MASK} 2> ${PREFIX}/network-analysis.log

Expand Down Expand Up @@ -161,9 +173,10 @@ do_pareto() {
}

do_marc_history() {
PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
printf "%s %s> [marc-history]\n" $(date +"%F %T")
printf "%s %s> ./formatter --selector \"008~7-10;008~0-5\" --defaultRecordType BOOKS ${TYPE_PARAMS} --separator \",\" --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} &> ${PREFIX}/marc-history.log\n" $(date +"%F %T")
./formatter --selector "008~7-10;008~0-5" --defaultRecordType BOOKS ${TYPE_PARAMS} --separator "," --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} &> ${PREFIX}/marc-history.log
printf "%s %s> ./formatter --selector \"008~7-10;008~0-5\" --defaultRecordType BOOKS ${PARAMS} --separator \",\" --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} &> ${PREFIX}/marc-history.log\n" $(date +"%F %T")
./formatter --selector "008~7-10;008~0-5" --defaultRecordType BOOKS ${PARAMS} --separator "," --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} &> ${PREFIX}/marc-history.log

printf "%s %s> Rscript scripts/marc-history.R ${OUTPUT_DIR} &>> ${PREFIX}/marc-history.log\n" $(date +"%F %T")
Rscript scripts/marc-history.R ${OUTPUT_DIR} &>> ${PREFIX}/marc-history.log
Expand Down
12 changes: 9 additions & 3 deletions index
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ usage:
-w, --with-delete delete before index
-s, --solrFieldType <field type>: How Solr field should be named.
Possible values: 'marc-tags', 'human-readable', or 'mixed'
-l, --limit <limit> index only a limited number of records
-x, --marcxml the source is in MARCXML format
-a, --alephseq the source is in Alephseq format
-t, --trimId trim record identifiers
Expand All @@ -42,15 +43,16 @@ if [ $# -eq 0 ]; then
show_usage
fi

GETOPT=$(getopt -o d:p:m:ws::xatr:hv: \
--long db:,file-path:,file-mask:,with-delete,solrFieldType:,marcxml,alephseq,trimId,defaultRecordType,help,marcVersion: \
GETOPT=$(getopt -o d:p:m:ws::xatr:hv:l: \
--long db:,file-path:,file-mask:,with-delete,solrFieldType:,marcxml,alephseq,trimId,defaultRecordType,help,marcVersion:,limit: \
-n ${ME} -- "$@")
eval set -- "$GETOPT"

DB=""
solrFieldType=mixed
defaultRecordType=BOOKS
marcVersion=MARC21
limit=""
while true ; do
case "$1" in
-d|--db) DB=$2 ; shift 2;;
Expand All @@ -60,6 +62,7 @@ while true ; do
-s|--solrFieldType) solrFieldType=$2 ; shift 2;;
-r|--defaultRecordType) defaultRecordType=$2 ; shift 2;;
-v|--marcVersion) marcVersion=$2 ; shift 2;;
-l|--limit) limit="--limit $2"; shift 2;;
-x|--marcxml) marcxml="--marcxml" ; shift;;
-a|--alephseq) alephseq="--alephseq" ; shift;;
-t|--trimId) trimId="--trimId" ; shift;;
Expand All @@ -69,6 +72,8 @@ while true ; do
esac
done

echo "limit: $limit"

CORE=${DB}_dev

export SOLR=http://localhost:8983/solr/${CORE}
Expand All @@ -89,6 +94,7 @@ running the command
--solrFieldType $solrFieldType \
--defaultRecordType $defaultRecordType \
--marcVersion $marcVersion \
$limit \
$trimId \
$marcxml \
$alephseq \
Expand All @@ -99,7 +105,7 @@ EOT
/usr/bin/java -cp $JAR de.gwdg.metadataqa.marc.cli.MarcToSolr \
--solrUrl ${SOLR} --solrFieldType $solrFieldType \
--defaultRecordType $defaultRecordType \
--marcVersion $marcVersion $trimId $marcxml $alephseq \
--marcVersion $marcVersion $limit $trimId $marcxml $alephseq \
${FILE_PATH}/${FILE_MASK}

echo "Start optimizing"
Expand Down

0 comments on commit 20be577

Please sign in to comment.