diff --git a/add_data_to_graph.sh b/add_data_to_graph.sh index 592a6d0..705f979 100755 --- a/add_data_to_graph.sh +++ b/add_data_to_graph.sh @@ -3,10 +3,11 @@ # ARG_HELP([Upload JSONLD and Turtle data to a Neurobagel graph]) # ARG_POSITIONAL_SINGLE([dir],[Path to directory containing .jsonld and/or .ttl files. ALL .jsonld and .ttl files in this directory will be uploaded.]) # ARG_POSITIONAL_SINGLE([graph-url],[Host and port at which to access the graph database to add data to (e.g., localhost:7200)]) -# ARG_POSITIONAL_SINGLE([graph-db],[Name of graph database to add data to]) +# ARG_POSITIONAL_SINGLE([graph-database],[Name of graph database to add data to]) # ARG_POSITIONAL_SINGLE([user],[Username for graph database access]) # ARG_POSITIONAL_SINGLE([password],[Password for graph database user]) # ARG_OPTIONAL_BOOLEAN([clear-data],[],[Whether or not to first clear all existing data from the graph database],[off]) +# ARG_OPTIONAL_BOOLEAN([use-graphdb-syntax],[],[Whether or not to use GraphDB API endpoints to update the specified graph database. If off, assumes the graph database is a Stardog database.],[off]) # ARGBASH_GO() # needed because of Argbash --> m4_ignore([ ### START OF CODE GENERATED BY Argbash v2.9.0 one line above ### @@ -35,19 +36,21 @@ begins_with_short_option() _positionals=() # THE DEFAULTS INITIALIZATION - OPTIONALS _arg_clear_data="off" +_arg_use_graphdb_syntax="off" print_help() { printf '%s\n' "Upload JSONLD and Turtle data to a Neurobagel graph" - printf 'Usage: %s [-h|--help] [--(no-)clear-data] \n' "$0" + printf 'Usage: %s [-h|--help] [--(no-)clear-data] [--(no-)use-graphdb-syntax] \n' "$0" printf '\t%s\n' ": Path to directory containing .jsonld and/or .ttl files. ALL .jsonld and .ttl files in this directory will be uploaded." printf '\t%s\n' ": Host and port at which to access the graph database to add data to (e.g., localhost:7200)" - printf '\t%s\n' ": Name of graph database to add data to" + printf '\t%s\n' ": Name of graph database to add data to" printf '\t%s\n' ": Username for graph database access" printf '\t%s\n' ": Password for graph database user" printf '\t%s\n' "-h, --help: Prints help" printf '\t%s\n' "--clear-data, --no-clear-data: Whether or not to first clear all existing data from the graph database (off by default)" + printf '\t%s\n' "--use-graphdb-syntax, --no-use-graphdb-syntax: Whether or not to use GraphDB API endpoints to update the specified graph database. If off, assumes the graph database is a Stardog database. (off by default)" } @@ -70,6 +73,10 @@ parse_commandline() _arg_clear_data="on" test "${1:0:5}" = "--no-" && _arg_clear_data="off" ;; + --no-use-graphdb-syntax|--use-graphdb-syntax) + _arg_use_graphdb_syntax="on" + test "${1:0:5}" = "--no-" && _arg_use_graphdb_syntax="off" + ;; *) _last_positional="$1" _positionals+=("$_last_positional") @@ -83,7 +90,7 @@ parse_commandline() handle_passed_args_count() { - local _required_args_string="'dir', 'graph-url', 'graph-db', 'user' and 'password'" + local _required_args_string="'dir', 'graph-url', 'graph-database', 'user' and 'password'" test "${_positionals_count}" -ge 5 || _PRINT_HELP=yes die "FATAL ERROR: Not enough positional arguments - we require exactly 5 (namely: $_required_args_string), but got only ${_positionals_count}." 1 test "${_positionals_count}" -le 5 || _PRINT_HELP=yes die "FATAL ERROR: There were spurious positional arguments --- we expect exactly 5 (namely: $_required_args_string), but got ${_positionals_count} (the last one was: '${_last_positional}')." 1 } @@ -92,7 +99,7 @@ handle_passed_args_count() assign_positional_args() { local _positional_name _shift_for=$1 - _positional_names="_arg_dir _arg_graph_url _arg_graph_db _arg_user _arg_password " + _positional_names="_arg_dir _arg_graph_url _arg_graph_database _arg_user _arg_password " shift "$_shift_for" for _positional_name in ${_positional_names} @@ -116,9 +123,10 @@ assign_positional_args 1 "${_positionals[@]}" jsonld_dir=$_arg_dir user=$_arg_user password=$_arg_password -graph_db=$_arg_graph_db +graph_db=$_arg_graph_database graph_url=$_arg_graph_url clear_data=$_arg_clear_data # value is either on or off (https://argbash.readthedocs.io/en/stable/guide.html#optional-arguments) +use_graphdb_syntax=$_arg_use_graphdb_syntax DELETE_TRIPLES_QUERY=" DELETE { @@ -127,34 +135,81 @@ DELETE { ?s ?p ?o . }" +# Depending on the graph backend used, set URLs for uploading data to and clearing data in graph database +base_url="http://${graph_url}/${graph_db}" +if [ "$use_graphdb_syntax" = "on" ]; then + upload_data_url="${base_url}/statements" + clear_data_url=$upload_data_url +else + upload_data_url=$base_url + clear_data_url="${base_url}/update" +fi + # Clear existing data in graph database if requested if [ "$clear_data" = "on" ]; then - echo -e "\nClearing existing data from ${graph_db}..." + echo -e "\nCLEARING EXISTING DATA FROM ${graph_db}..." - curl -u "${user}:${password}" -X POST http://${graph_url}/${graph_db}/update \ + response=$(curl -u "${user}:${password}" --no-progress-meter -i -w "\n%{http_code}\n" \ + -X POST $clear_data_url \ -H "Content-Type: application/sparql-update" \ - --data-binary "${DELETE_TRIPLES_QUERY}" - - echo -e "Done clearing existing data from ${graph_db}.\n" + --data-binary "${DELETE_TRIPLES_QUERY}") + + # Extract and check status code outputted as final line of response + httpcode=$(tail -n1 <<< "$response") + if (( $httpcode < 200 || $httpcode >= 300 )); then + echo -e "\nERROR: Failed to clear ${graph_db}:" + echo "$(sed '$d' <<< "$response")" + echo -e "\nEXITING..." + exit 1 + fi fi # Add data to specified graph database -echo "Uploading data from ${jsonld_dir} to ${graph_db}..." +echo -e "\nUPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}...\n" + +upload_failed=() for db in ${jsonld_dir}/*.jsonld; do - curl -u "${user}:${password}" -i -X POST http://${graph_url}/${graph_db} \ - -H "Content-Type: application/ld+json" \ - --data-binary @${db} + # Prevent edge case where no matching files are present in directory and so loop executes once with glob pattern string itself + [ -e "$db" ] || continue + + echo "$(basename ${db}):" + response=$(curl -u "${user}:${password}" --no-progress-meter -i -w "\n%{http_code}\n" \ + -X POST $upload_data_url \ + -H "Content-Type: application/ld+json" \ + --data-binary @${db}) + + httpcode=$(tail -n1 <<< "$response") + if (( $httpcode < 200 || $httpcode >= 300 )); then + upload_failed+=("${db}") + fi + # Print rest of response to stdout + echo -e "$(sed '$d' <<< "$response")\n" done for file in ${jsonld_dir}/*.ttl; do - curl -u "${user}:${password}" -i -X POST http://${graph_url}/${graph_db} \ - -H "Content-Type: text/turtle" \ - --data-binary @${file} + [ -e "$file" ] || continue + + echo "$(basename ${file}):" + response=$(curl -u "${user}:${password}" --no-progress-meter -i -w "\n%{http_code}\n" \ + -X POST $upload_data_url \ + -H "Content-Type: text/turtle" \ + --data-binary @${file}) + + httpcode=$(tail -n1 <<< "$response") + if (( $httpcode < 200 || $httpcode >= 300 )); then + upload_failed+=("${file}") + fi + echo -e "$(sed '$d' <<< "$response")\n" done -echo "Finished uploading data from ${jsonld_dir} to ${graph_db}" +echo "FINISHED UPLOADING DATA FROM ${jsonld_dir} TO ${graph_db}." + +if (( ${#upload_failed[@]} != 0 )); then + echo -e "\nERROR: Upload failed for these files:" + printf '%s\n' "${upload_failed[@]}" +fi # ] <-- needed because of Argbash