Skip to content
This repository has been archived by the owner on Jun 25, 2020. It is now read-only.

Commit

Permalink
improved logging in scripts #22
Browse files Browse the repository at this point in the history
  • Loading branch information
felixlohmeier committed Jun 25, 2018
1 parent 19e5218 commit 10a7442
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 42 deletions.
26 changes: 19 additions & 7 deletions bin/ediss.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ while getopts $options opt; do
done
shift $((OPTIND - 1))

# get environment variables
# get environmental variables
if [ -n "$HOSSOLRUSER" ]; then solr_credentials="-u $HOSSOLRUSER:$HOSSOLRPASS"; fi

# declare additional variables
Expand All @@ -77,7 +77,7 @@ if [ -n "${config_dir// }" ] ; then jsonfiles=($(find -L "${config_dir}"/*.json
cleanup()
{
echo "cleanup..."
kill -9 ${pid}
kill -9 ${pid} &>/dev/null
rm -rf /tmp/openrefine_${date}
wait
}
Expand Down Expand Up @@ -182,6 +182,14 @@ echo ""
cleanup
echo ""

# Grep log for exceptions
exceptions=$(grep -i exception "${log_dir}/${codename}_${date}.log")
if [ -n "$exceptions" ]; then
echo 1>&2 "$exceptions"
echo 1>&2 "Konfiguration scheint fehlerhaft zu sein! Bitte manuell prüfen."
exit 2
fi

# Ingest data into Solr
if [ -n "$solr_url" ]; then
checkpoints=${#checkpointdate[@]}
Expand All @@ -197,10 +205,11 @@ if [ -n "$solr_url" ]; then
multivalue_config+=(\&f.$i.separator=$separator)
done
multivalue_config=$(printf %s "${multivalue_config[@]}")
# delete existing data
curl $solr_credentials -sS "${solr_url}/update?commit=true" -H "Content-Type: text/xml" --data-binary "<delete><query>source:${codename}</query></delete>" 1>/dev/null
# load new data
curl $solr_credentials --progress-bar "${solr_url}/update/csv?commit=true&optimize=true&separator=%09&literal.source=${codename}&split=true${multivalue_config}" --data-binary @- -H 'Content-type:text/plain; charset=utf-8' < ${data_dir}/02_transformed/${codename}_${date}.tsv
echo "delete existing data..."
curl $solr_credentials -sS "${solr_url}/update?commit=true" -H "Content-Type: application/json" --data-binary "{ \"delete\": { \"query\": \"source:${codename}\" } }" | jq .responseHeader
echo ""
echo "load new data..."
curl $solr_credentials --progress-bar "${solr_url}/update/csv?commit=true&optimize=true&separator=%09&literal.source=${codename}&split=true${multivalue_config}" --data-binary @- -H 'Content-type:text/plain; charset=utf-8' < ${data_dir}/02_transformed/${codename}_${date}.tsv | jq .responseHeader
echo ""
fi

Expand All @@ -213,7 +222,10 @@ if [ -n "$openrefine_url" ]; then
echo ""
echo "starting time: $(date --date=@${checkpointdate[$((checkpoints + 1))]})"
echo ""
${openrefine_client} -H ${external_host} -P ${external_port} --delete "${codename}_live" &>/dev/null
echo "delete existing project ${codename}_live..."
${openrefine_client} -H ${external_host} -P ${external_port} --delete "${codename}_live"
echo ""
echo "create new project ${codename}_live..."
${openrefine_client} -H ${external_host} -P ${external_port} --create "${data_dir}/02_transformed/${codename}_${date}.tsv" --encoding=UTF-8 --projectName=${codename}_live
echo ""
fi
Expand Down
26 changes: 19 additions & 7 deletions bin/tubdok.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ while getopts $options opt; do
done
shift $((OPTIND - 1))

# get environment variables
# get environmental variables
if [ -n "$HOSSOLRUSER" ]; then solr_credentials="-u $HOSSOLRUSER:$HOSSOLRPASS"; fi

# declare additional variables
Expand All @@ -77,7 +77,7 @@ if [ -n "${config_dir// }" ] ; then jsonfiles=($(find -L "${config_dir}"/*.json
cleanup()
{
echo "cleanup..."
kill -9 ${pid}
kill -9 ${pid} &>/dev/null
rm -rf /tmp/openrefine_${date}
wait
}
Expand Down Expand Up @@ -182,6 +182,14 @@ echo ""
cleanup
echo ""

# Grep log for exceptions
exceptions=$(grep -i exception "${log_dir}/${codename}_${date}.log")
if [ -n "$exceptions" ]; then
echo 1>&2 "$exceptions"
echo 1>&2 "Konfiguration scheint fehlerhaft zu sein! Bitte manuell prüfen."
exit 2
fi

# Ingest data into Solr
if [ -n "$solr_url" ]; then
checkpoints=${#checkpointdate[@]}
Expand All @@ -197,10 +205,11 @@ if [ -n "$solr_url" ]; then
multivalue_config+=(\&f.$i.separator=$separator)
done
multivalue_config=$(printf %s "${multivalue_config[@]}")
# delete existing data
curl $solr_credentials -sS "${solr_url}/update?commit=true" -H "Content-Type: text/xml" --data-binary "<delete><query>source:${codename}</query></delete>" 1>/dev/null
# load new data
curl $solr_credentials --progress-bar "${solr_url}/update/csv?commit=true&optimize=true&separator=%09&literal.source=${codename}&split=true${multivalue_config}" --data-binary @- -H 'Content-type:text/plain; charset=utf-8' < ${data_dir}/02_transformed/${codename}_${date}.tsv
echo "delete existing data..."
curl $solr_credentials -sS "${solr_url}/update?commit=true" -H "Content-Type: application/json" --data-binary "{ \"delete\": { \"source\": \"${codename}\" } }" | jq .responseHeader
echo ""
echo "load new data..."
curl $solr_credentials --progress-bar "${solr_url}/update/csv?commit=true&optimize=true&separator=%09&literal.source=${codename}&split=true${multivalue_config}" --data-binary @- -H 'Content-type:text/plain; charset=utf-8' < ${data_dir}/02_transformed/${codename}_${date}.tsv | jq .responseHeader
echo ""
fi

Expand All @@ -213,7 +222,10 @@ if [ -n "$openrefine_url" ]; then
echo ""
echo "starting time: $(date --date=@${checkpointdate[$((checkpoints + 1))]})"
echo ""
${openrefine_client} -H ${external_host} -P ${external_port} --delete "${codename}_live" &>/dev/null
echo "delete existing project ${codename}_live..."
${openrefine_client} -H ${external_host} -P ${external_port} --delete "${codename}_live"
echo ""
echo "create new project ${codename}_live..."
${openrefine_client} -H ${external_host} -P ${external_port} --create "${data_dir}/02_transformed/${codename}_${date}.tsv" --encoding=UTF-8 --projectName=${codename}_live
echo ""
fi
Expand Down
26 changes: 19 additions & 7 deletions bin/zenodo-tuhh.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ while getopts $options opt; do
done
shift $((OPTIND - 1))

# get environment variables
# get environmental variables
if [ -n "$HOSSOLRUSER" ]; then solr_credentials="-u $HOSSOLRUSER:$HOSSOLRPASS"; fi

# declare additional variables
Expand All @@ -77,7 +77,7 @@ if [ -n "${config_dir// }" ] ; then jsonfiles=($(find -L "${config_dir}"/*.json
cleanup()
{
echo "cleanup..."
kill -9 ${pid}
kill -9 ${pid} &>/dev/null
rm -rf /tmp/openrefine_${date}
wait
}
Expand Down Expand Up @@ -182,6 +182,14 @@ echo ""
cleanup
echo ""

# Grep log for exceptions
exceptions=$(grep -i exception "${log_dir}/${codename}_${date}.log")
if [ -n "$exceptions" ]; then
echo 1>&2 "$exceptions"
echo 1>&2 "Konfiguration scheint fehlerhaft zu sein! Bitte manuell prüfen."
exit 2
fi

# Ingest data into Solr
if [ -n "$solr_url" ]; then
checkpoints=${#checkpointdate[@]}
Expand All @@ -197,10 +205,11 @@ if [ -n "$solr_url" ]; then
multivalue_config+=(\&f.$i.separator=$separator)
done
multivalue_config=$(printf %s "${multivalue_config[@]}")
# delete existing data
curl $solr_credentials -sS "${solr_url}/update?commit=true" -H "Content-Type: text/xml" --data-binary "<delete><query>source:${codename}</query></delete>" 1>/dev/null
# load new data
curl $solr_credentials --progress-bar "${solr_url}/update/csv?commit=true&optimize=true&separator=%09&literal.source=${codename}&split=true${multivalue_config}" --data-binary @- -H 'Content-type:text/plain; charset=utf-8' < ${data_dir}/02_transformed/${codename}_${date}.tsv
echo "delete existing data..."
curl $solr_credentials -sS "${solr_url}/update?commit=true" -H "Content-Type: application/json" --data-binary "{ \"delete\": { \"source\": \"${codename}\" } }" | jq .responseHeader
echo ""
echo "load new data..."
curl $solr_credentials --progress-bar "${solr_url}/update/csv?commit=true&optimize=true&separator=%09&literal.source=${codename}&split=true${multivalue_config}" --data-binary @- -H 'Content-type:text/plain; charset=utf-8' < ${data_dir}/02_transformed/${codename}_${date}.tsv | jq .responseHeader
echo ""
fi

Expand All @@ -213,7 +222,10 @@ if [ -n "$openrefine_url" ]; then
echo ""
echo "starting time: $(date --date=@${checkpointdate[$((checkpoints + 1))]})"
echo ""
${openrefine_client} -H ${external_host} -P ${external_port} --delete "${codename}_live" &>/dev/null
echo "delete existing project ${codename}_live..."
${openrefine_client} -H ${external_host} -P ${external_port} --delete "${codename}_live"
echo ""
echo "create new project ${codename}_live..."
${openrefine_client} -H ${external_host} -P ${external_port} --create "${data_dir}/02_transformed/${codename}_${date}.tsv" --encoding=UTF-8 --projectName=${codename}_live
echo ""
fi
Expand Down
27 changes: 18 additions & 9 deletions init-solr-schema.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Usage: ./init-solr-schema.sh [-s SOLRURL]
== example ==
./init-solr-schema.sh -s http://localhost:8983/solr/hos
./init-solr-schema.sh -s https://hosdev.sub.uni-hamburg.de/solrAdmin/HOS
EOF
exit 1
}
Expand All @@ -31,25 +32,33 @@ while getopts $options opt; do
done
shift $((OPTIND - 1))

# get sysenv
# get environmental variables
if [ -n "$HOSSOLRUSER" ]; then solr_credentials="-u $HOSSOLRUSER:$HOSSOLRPASS"; fi

# declare additional variables
path_config=$(readlink -f cfg/solr)
config_dir=$(readlink -f cfg/solr)
solr_base=${solr_url%/*}
solr_core=${solr_url##*/}
if [ -n "${config_dir// }" ] ; then jsonfiles=($(find -L "${config_dir}"/*.json -type f -printf "%f\n" 2>/dev/null)) ; fi

# print variables
echo "Solr core URL: $solr_url"
echo "Solr base URL: $solr_base"
echo "Solr core name: $solr_core"
echo "Solr config files: ${jsonfiles[*]}"
echo ""

# delete existing data
echo "delete existing data..."
curl $solr_credentials -sS "${solr_base}/${solr_core}/update?commit=true" -H "Content-Type: text/xml" --data-binary "<delete><query>*:*</query></delete>" 1>/dev/null
curl $solr_credentials -sS "${solr_base}/${solr_core}/update?commit=true" -H "Content-Type: application/json" --data-binary '{ "delete": { "query": "*:*" } }' | jq .responseHeader

# delete fields and copy fields
echo "delete fields and copy fields..."
curl $solr_credentials -sS -X POST -H 'Content-type:application/json' --data-binary "{ \"delete-copy-field\" : $(curl $solr_credentials --silent "${solr_base}/${solr_core}/schema/copyfields" | jq '[.copyFields[] | {source: .source, dest: .dest}]') }" ${solr_base}/${solr_core}/schema
curl $solr_credentials -sS "${solr_base}/admin/cores?action=RELOAD&core=${solr_core}" 1>/dev/null
curl $solr_credentials -sS -X POST -H 'Content-type:application/json' --data-binary "{ \"delete-field\" : $(curl $solr_credentials --silent "${solr_base}/${solr_core}/schema/fields" | jq '[ .fields[] | {name: .name } ]') }" ${solr_base}/${solr_core}/schema
echo "delete fields, reload core and delete copy fields..."
curl $solr_credentials -sS -X POST -H 'Content-type:application/json' --data-binary "{ \"delete-copy-field\" : $(curl $solr_credentials --silent "${solr_base}/${solr_core}/schema/copyfields" | jq '[.copyFields[] | {source: .source, dest: .dest}]') }" ${solr_base}/${solr_core}/schema | jq .responseHeader
curl $solr_credentials -sS "${solr_base}/admin/cores?action=RELOAD&core=${solr_core}" | jq .responseHeader
curl $solr_credentials -sS -X POST -H 'Content-type:application/json' --data-binary "{ \"delete-field\" : $(curl $solr_credentials --silent "${solr_base}/${solr_core}/schema/fields" | jq '[ .fields[] | {name: .name } ]') }" ${solr_base}/${solr_core}/schema | jq .responseHeader

# add fields and copy fields
echo "add fields and copy fields..."
curl $solr_credentials -sS -X POST -H 'Content-type:application/json' --data-binary "{ \"add-field\" : $(< ${path_config}/fields.json) }" ${solr_base}/${solr_core}/schema
curl $solr_credentials -sS -X POST -H 'Content-type:application/json' --data-binary "{ \"add-copy-field\" : $(< ${path_config}/copyfields.json) }" ${solr_base}/${solr_core}/schema
curl $solr_credentials -sS -X POST -H 'Content-type:application/json' --data-binary "{ \"add-field\" : $(< ${config_dir}/fields.json) }" ${solr_base}/${solr_core}/schema | jq .responseHeader
curl $solr_credentials -sS -X POST -H 'Content-type:application/json' --data-binary "{ \"add-copy-field\" : $(< ${config_dir}/copyfields.json) }" ${solr_base}/${solr_core}/schema | jq .responseHeader
5 changes: 4 additions & 1 deletion load-new-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ if [ -n "$openrefine_url" ]; then
echo ""
echo "starting time: $(date --date=@${checkpointdate[$((checkpoints + 1))]})"
echo ""
${openrefine_client} -H ${external_host} -P ${external_port} --delete "${codename}_new" &>/dev/null
echo "delete existing project ${codename}_new..."
${openrefine_client} -H ${external_host} -P ${external_port} --delete "${codename}_new"
echo ""
echo "create new project ${codename}_new..."
${openrefine_client} -H ${external_host} -P ${external_port} --create "${data_dir}/01_oai/${codename}_${date}.xml" $(for i in ${recordpath[@]}; do echo "--recordPath=$i "; done) --projectName=${codename}_new
echo ""
fi
Expand Down
32 changes: 21 additions & 11 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ data_dir="$(readlink -f data)"
log_dir="$(readlink -f log)"

# config
ram="2048M" # highest OpenRefine memory load is below 2048M
separator="%E2%90%9F" # multiple values are separated by unicode character unit separator (U+241F)
config_dir="$(readlink -f cfg/all)" # location of OpenRefine transformation rules in json format

Expand All @@ -25,6 +26,7 @@ Usage: ./run.sh [-s SOLRURL] [-d OPENREFINEURL]
== example ==
./run.sh -s http://localhost:8983/solr/hos -d http://localhost:3333
./run.sh -s https://hosdev.sub.uni-hamburg.de/solrAdmin/HOS
EOF
exit 1
}
Expand Down Expand Up @@ -70,9 +72,9 @@ cleanup()
{
echo "cleanup..."
for i in ${pid[@]}; do
kill $i &
kill $i &>/dev/null &
done
kill -9 ${pid_openrefine}
kill -9 ${pid_openrefine} &>/dev/null
rm -rf /tmp/openrefine_${date}
wait
}
Expand Down Expand Up @@ -114,10 +116,16 @@ until [[ "$count" -eq "0" ]]; do
done
echo ""
echo ""
echo "print stats from logs..."
echo "print stats and exceptions from logs..."
for f in "${path_bin}"/*.sh; do
stats=$(tail -n 3 "${path_log}/$(basename -s .sh ${f})_${date}"*.log | sed 's/total run time://' | sed 's/highest memory load://' | sed 's/number of records://')
exceptions=$(grep -i exception "${path_log}/$(basename -s .sh ${f})_${date}"*.log)
echo $(basename ${f}): $stats
if [ -n "$exceptions" ]; then
echo 1>&2 "$exceptions"
echo 1>&2 "Konfiguration für ${f} scheint fehlerhaft zu sein! Bitte manuell prüfen."
exit 2
fi
done
echo ""

Expand All @@ -134,13 +142,12 @@ mkdir -p "${openrefine_tmp}"
zip ${openrefine_tmp}/tmp.zip "${data_dir}/02_transformed/"*"_${date}"*".tsv"
echo ""
echo "launch OpenRefine server..."
$openrefine_server -p ${port} -d "$openrefine_tmp" -m ${ram} 1>/dev/null &
$openrefine_server -p ${port} -d "$openrefine_tmp" -m ${ram} -v error &
pid_openrefine=$!
until wget -q -O - http://localhost:${port} | cat | grep -q -o "OpenRefine" ; do sleep 1; done
echo ""
echo "load data..."
$openrefine_client -P ${port} --create "${openrefine_tmp}/tmp.zip" --format=tsv --includeFileSources=false --projectName=all
echo ""
ps -o start,etime,%mem,%cpu,rss -p ${pid_openrefine} --sort=start
memoryload+=($(ps --no-headers -o rss -p ${pid_openrefine}))
echo ""
Expand All @@ -153,7 +160,6 @@ for f in "${jsonfiles[@]}" ; do
done
echo "export data..."
$openrefine_client -P ${port} --export --output="${data_dir}/03_combined/all_${date}.tsv" "all"
echo ""
ps -o start,etime,%mem,%cpu,rss -p ${pid_openrefine} --sort=start
memoryload+=($(ps --no-headers -o rss -p ${pid_openrefine}))
echo ""
Expand All @@ -175,10 +181,11 @@ if [ -n "$solr_url" ]; then
multivalue_config+=(\&f.$i.separator=$separator)
done
multivalue_config=$(printf %s "${multivalue_config[@]}")
# delete existing data
curl $solr_credentials -sS "${solr_url}/update?commit=true" -H "Content-Type: text/xml" --data-binary '<delete><query>*:*</query></delete>' 1>/dev/null
# load new data
curl $solr_credentials --progress-bar "${solr_url}/update/csv?commit=true&optimize=true&separator=%09&literal.source=${all}&split=true${multivalue_config}" --data-binary @- -H 'Content-type:text/plain; charset=utf-8' < ${data_dir}/03_combined/all_${date}.tsv
echo "delete existing data..."
curl $solr_credentials -sS "${solr_url}/update?commit=true" -H "Content-Type: application/json" --data-binary '{ "delete": { "query": "*:*" } }' | jq .responseHeader
echo ""
echo "load new data..."
curl $solr_credentials --progress-bar "${solr_url}/update/csv?commit=true&optimize=true&separator=%09&literal.source=all&split=true${multivalue_config}" --data-binary @- -H 'Content-type:text/plain; charset=utf-8' < ${data_dir}/03_combined/all_${date}.tsv | jq .responseHeader
echo ""
fi

Expand All @@ -191,7 +198,10 @@ if [ -n "$openrefine_url" ]; then
echo ""
echo "starting time: $(date --date=@${checkpointdate[$((checkpoints + 1))]})"
echo ""
${openrefine_client} -H ${external_host} -P ${external_port} --delete "all_live" &>/dev/null
echo "delete existing project all_live..."
${openrefine_client} -H ${external_host} -P ${external_port} --delete "all_live"
echo ""
echo "create new project all_live..."
${openrefine_client} -H ${external_host} -P ${external_port} --create "${data_dir}/03_combined/all_${date}.tsv" --encoding=UTF-8 --projectName=all_live
echo ""
fi
Expand Down

0 comments on commit 10a7442

Please sign in to comment.