diff --git a/README.md b/README.md index d05485d..0b05045 100644 --- a/README.md +++ b/README.md @@ -517,4 +517,5 @@ To test the generated man page use: `MANPATH="./man" man man/xml2xpath.sh.1` ## Known issues -* No one! ... that I know of :-p but [performance](#performance) with big documents can always be an issue. +* `-p` might give inconsistent namespace prefixes on documents with multiple namespaces. +[Performance](#performance) with big documents can always be an issue. diff --git a/VERSION b/VERSION index 26acbf0..aa22d3c 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.12.2 +0.12.3 diff --git a/tests/test-html-base.sh b/tests/test-html-base.sh index ff8af10..c0d3f09 100755 --- a/tests/test-html-base.sh +++ b/tests/test-html-base.sh @@ -38,9 +38,9 @@ test_result "$?" test_opts=(-a -r) print_test_descr "TC05" -dup_cnt=$(test_run_count) +dup_cnt=$(test_run_basic_count) test_opts=(-a) -uniq_cnt=$(test_run_count) +uniq_cnt=$(test_run_basic_count) #[ ! "$dup_cnt" -gt "$uniq_cnt" ] && echo "duplicates: $dup_cnt -gt $uniq_cnt $?" [ ! "$dup_cnt" -gt "$uniq_cnt" ] test_result "$?" diff --git a/tests/test-lib-src.sh b/tests/test-lib-src.sh index 18eac69..e531738 100644 --- a/tests/test-lib-src.sh +++ b/tests/test-lib-src.sh @@ -13,7 +13,7 @@ echo_with_pid(){ #--------------------------------------------------------------------------------------- # Verify test case result and return result and description #--------------------------------------------------------------------------------------- -function test_result(){ +test_result(){ retval=1 [ -n "$2" ] && retval="$2" @@ -27,13 +27,13 @@ function test_result(){ } -function show_color(){ +show_color(){ while read -r line; do echo -e "\e[01;31m$line\e[0m" done } -function quote_opts(){ +quote_opts(){ local str='' for o in "$@"; do if grep -q "^[-][a-z]" <<<"$o"; then @@ -48,18 +48,19 @@ function quote_opts(){ #--------------------------------------------------------------------------------------- # Run test case #--------------------------------------------------------------------------------------- -function show_errors(){ +show_errors(){ + err_pattern='XPath error|No xpath found|Boolean : false' if [ "$DBG" -eq 1 ]; then - tee /dev/stderr 2> >(show_color) | grep -Eq 'XPath error|No xpath found' + tee /dev/stderr 2> >(show_color) | grep -Eq "$err_pattern" else - grep -Eq 'XPath error|No xpath found' + grep -Eq "$err_pattern" fi } #--------------------------------------------------------------------------------------- -# Run test case +# Print test case description #--------------------------------------------------------------------------------------- -function print_test_descr(){ +print_test_descr(){ descr="$1 : ${!1}" echo_with_pid "$descr" if [ "$DBG" -eq 1 ]; then @@ -71,19 +72,42 @@ function print_test_descr(){ #--------------------------------------------------------------------------------------- # Run test case #--------------------------------------------------------------------------------------- -function test_run(){ +test_run(){ if [ ! -f "${test_type_opts[${#test_type_opts[@]} - 1]}" ]; then echo_with_pid "ERROR file not found: ${test_type_opts[${#test_type_opts[@]} - 1]}" | show_color exit 1 fi - print_test_descr "$1" - print_test_descr "$1" >>"$TRACE_FILE" + print_test_descr "$1" | tee -a "$TRACE_FILE" ../xml2xpath.sh "${test_opts[@]}" "${test_type_opts[@]}" 2>&1 1>>"$TRACE_FILE" | show_errors } +#--------------------------------------------------------------------------------------- +# Run count test case +#--------------------------------------------------------------------------------------- +test_run_count(){ + if [ ! -f "${test_type_opts[${#test_type_opts[@]} - 1]}" ]; then + echo_with_pid "ERROR file not found: ${test_type_opts[${#test_type_opts[@]} - 1]}" | show_color + exit 1 + fi + print_test_descr "$1" | tee -a "$TRACE_FILE" + result=$(../xml2xpath.sh "${test_opts[@]}" "${test_type_opts[@]}") + gawk 'BEGIN{RS="\n\n";FS="\n"} + { + if(NR == 1) printf "%s\n","setrootns" + for(i=1; i<=NF; i++) { + if($i == "") continue + if($0 ~ /^[^=\/]+=/){ + printf "%s\n", "setns " $i + } else { + printf "%s\n", "xpath count(" $i ") > 0" + } + } + }END{ printf "bye\n" }' <<<"$result" | xmllint --shell "${test_type_opts[@]}" 2>&1 | show_errors +} + #--------------------------------------------------------------------------------------- # Run test case for duplicates count #--------------------------------------------------------------------------------------- -function test_run_count(){ +test_run_basic_count(){ ../xml2xpath.sh "${test_opts[@]}" "${test_type_opts[@]}" | wc -l } diff --git a/tests/test-xml-ns-01.sh b/tests/test-xml-ns-01.sh index 45f0fb9..5007467 100755 --- a/tests/test-xml-ns-01.sh +++ b/tests/test-xml-ns-01.sh @@ -10,7 +10,7 @@ script_name=$(basename "$0") source test-lib-src.sh xml_file="resources/HL7.xml" test_opts=() -test_type_opts=(-x "$xml_file") +test_type_opts=("$xml_file") rel_xpath='/defaultns:ClinicalDocument/defaultns:recordTarget' echo_with_pid "*** XML tests - namespaces on root element ($script_name) ***" @@ -19,6 +19,7 @@ TC01="Basic test (-x)" TC02="Replace default namespace definition (-o), relative path (-s)" TC03="Find nodes using namespaces (-n)" TC04="Find nodes by absolute xpath using namespaces (-a -n)" +TC05="Count nodes with replaced default namespace definition (-o), relative path (-s)" test_run "TC01" test_result "$?" @@ -40,3 +41,8 @@ test_result "$?" test_opts=(-a -n -s "${rel_xpath}") test_run "TC04" test_result "$?" + +test_opts=('-q' '-n' '-o' 'defns=urn:hl7-org:v3' '-s' '//defns:addr') +#xml2xpath.sh "${test_opts[@]}" -x "$xml_file" | grep -q 'XPath error' +test_run_count "TC05" +test_result "$?" \ No newline at end of file diff --git a/tests/test-xml-ns-02.sh b/tests/test-xml-ns-02.sh index c72fa13..11373bb 100755 --- a/tests/test-xml-ns-02.sh +++ b/tests/test-xml-ns-02.sh @@ -13,13 +13,14 @@ script_name=$(basename "$0") source test-lib-src.sh xml_file="resources/soap.xml" test_opts=() -test_type_opts=(-x "$xml_file") +test_type_opts=("$xml_file") rel_xpath='//incident' echo_with_pid "*** XML tests - Namespaces on root element and body ($script_name) ***" # Test case descriptions TC01="Basic test (-x)" TC02="Replace default namespace definition (-o), relative path (-s)" +TC04="Test count of nodes found 'TC02' is greater than 0 for all found expressions" test_run "TC01" test_result "$?" @@ -28,3 +29,9 @@ test_result "$?" test_opts=(-o 'defns=http://example.com/ns1' -s "//defns:incident") test_run "TC02" test_result "$?" + +#test_type_opts=("resources/ns-with-default.xml") +#test_opts=( '-q' '-s' '//*[local-name()="x"]' ) +test_opts=('-q' '-n' '-o' 'defns=http://example.com/ns1' -s "//defns:incident") +test_run_count "TC04" +test_result "$?" diff --git a/tests/test-xml-ns-03.sh b/tests/test-xml-ns-03.sh index f870141..d258ccd 100755 --- a/tests/test-xml-ns-03.sh +++ b/tests/test-xml-ns-03.sh @@ -17,14 +17,14 @@ script_name=$(basename "$0") source test-lib-src.sh xml_file="resources/html5.html" test_opts=() -test_type_opts=(-x "$xml_file") +test_type_opts=( "$xml_file" ) echo_with_pid "*** XHTML tests - Namespaces on root element and body. Multiple default namespaces across document. ($script_name) ***" # Test case descriptions TC01="Basic test (-x)" TC02="Replace 'defaultns' prefix (-p)" TC03="Replace default namespace definition (-o), relative path (-s)" - +TC04="Test count of nodes TC03 is greater than 0 for all found expressions" test_run "TC01" test_result "$?" @@ -40,3 +40,6 @@ test_opts=(-o 'dft01=http://www.w3.org/1998/Math/MathML' -s '//dft01:math/dft01: test_run "TC03" test_result "$?" +test_opts=("-q" '-n' "-o" 'dft01=http://www.w3.org/1998/Math/MathML' -s '//dft01:math/dft01:mrow/dft01:mn') +test_run_count "TC04" +test_result "$?" diff --git a/xml2xpath.sh b/xml2xpath.sh index 6021b91..c72b48e 100755 --- a/xml2xpath.sh +++ b/xml2xpath.sh @@ -5,7 +5,7 @@ # script_name=$(basename "$0") -version="0.12.2" +version="0.12.3" if [ -f "VERSION" ];then read -r version < VERSION @@ -104,6 +104,7 @@ abs_path=0 print_tree=0 quiet=0 max_elements=1100000 +use_ns=0 uniq_xp=1 ns_prefix='' defns='' @@ -195,25 +196,6 @@ is_read_error(){ return "$1" } -#--------------------------------------------------------------------------------------- -# Parse namespaces from 'ls /*/namespace::*' -# n 1 default -> http://www.w3.org/1999/xhtml -# to -# default=http://www.w3.org/1999/xhtml -#--------------------------------------------------------------------------------------- -parse_ns_from_xpath(){ - - while read -r -u 3 -t "$rtout" xline || is_read_error "$?" "(parse ns stage1)"; do - printf "%s\n" "$xline" - if [ "$xline" == "/ > dir $xuuid" ]; then - break - fi - done | sed -E -e :a -e '/^[1-9]/,/^(default|namespace)/ { $!N;s/\n(default|namespace)/¦\1/;ta }' \ - -e 's/^([0-9]{1,8}) *ELEMENT *([^ ]*)/\1¦\2/' \ - -e 's/(default)? ?namespace ([a-z0-9]+)? ?href=([^=]+)¦?/\1\2=\3/g' \ - -e '/^[1-9]/ P;D' -} - #--------------------------------------------------------------------------------------- # Sent xmllint shell commands to running instance #--------------------------------------------------------------------------------------- @@ -253,27 +235,17 @@ get_xml_tree(){ # send commands to xmllint shell set_root_ns >&4 - - if [ "$isHtml" -eq 0 ]; then - # xpath command contains namespace declaration at the node - # so it will be used to make a lookup array since it provides element index. - send_cmd "xpath //*" - send_cmd "dir $xuuid" - parse_ns_from_xpath - send_cmd "dir $xuuid" - print_response "$max_elements" "(ns stage1)" - - else - send_cmd "\ndir $xuuid" - print_response 2 "(ns stage)" - fi # namespaces at root element send_cmd "ls /*/namespace::*[local-name()!='xml' and namespace-uri()!='http://www.w3.org/1999/xhtml']" send_cmd "dir $xuuid" print_response "$max_elements" "(ns stage2)" # namespaces at root element descendants. Provides full length uris. - send_cmd "ls /*//*/namespace::*[local-name()!='xml'][count(./parent::*/namespace::*[local-name()!='xml'])]" + child_ns_xpath="ls /*//*/namespace::*[local-name()!='xml'][count(./parent::*/namespace::*[local-name()!='xml'])]" + if [ "$use_ns" -eq 0 ];then + child_ns_xpath="ls /*/namespace::*[local-name()='$xuuid']" + fi + send_cmd "$child_ns_xpath" send_cmd "dir $xuuid" print_response "$max_elements" "(ns stage2)" @@ -303,7 +275,7 @@ set_root_ns(){ if [ -n "$defns" ] ;then echo "setns defaultns=" OLD_IFS="$IFS" - IFS=$'¦' read -r -a extns <<<"$defns" + IFS=$'' read -r -a extns <<<"$defns" IFS=$"$OLD_IFS" for n in "${extns[@]}"; do echo "setns $n" @@ -328,7 +300,7 @@ make_unique_ns_arr(){ elif [ "$ni" -gt 0 ]; then echo "${nsxx}${ni}=${name_uri##*=}" else - echo "${nsxx}=${name_uri##*=}" + echo "${nsxx}${ni}=${name_uri##*=}" fi ((ni=ni+1)) ;; @@ -405,7 +377,7 @@ print_all_xpath(){ #--------------------------------------------------------------------------------------- print_unique_xpath(){ if [ "$uniq_xp" -eq 1 ] ; then - sort_unique_keep_order " " + sort_unique_keep_order else cat fi @@ -447,10 +419,6 @@ init_env(){ else xprefix='/' fi - - if [ "$isHtml" -eq 0 ] && [ -z "$ns_prefix" ];then - ns_prefix="defaultns" - fi fifo_in="/tmp/xffin.$$" fifo_out="/tmp/xffout.$$" @@ -492,6 +460,7 @@ do ;; n|p) [ -n "$OPTARG" ] && ns_prefix=$OPTARG && all_opts[${#all_opts[@]}]="-p ; ns prefix=$ns_prefix" [ -z "$ns_prefix" ] && ns_prefix="defaultns" && all_opts[${#all_opts[@]}]="-n ; default ns prefix: $ns_prefix" + use_ns=1 ;; q) quiet=1 ;; @@ -545,16 +514,15 @@ print_separator "${all_opts[@]}" # Get XML namespaces and doc tree with xmllint # 'dir $xuuid' kinda NoOp that provides a record separator for awk IFS=$'¬' read -r -d '' -a xml_info < <( get_xml_tree | awk -v fs="$fs" -v ers="dir $xuuid\n" 'BEGIN{ RS=ers }{ print $0 fs }' && printf '\0' ) - # Put all found namespaces in array as = -IFS=$'\n' read -r -d '' -a all_ns_arr < <(printf "%s\n" "${xml_info[2]}" | sed -nE '/^n +1 / s/^n +1 ([^ ]+) -> ([^ ]+)/\1=\2/p') -IFS=$'\n' read -r -d '' -a root_ns_arr < <(printf "%s\n" "${xml_info[1]}" | sed -nE '/^n +1 / s/^n +1 ([^ ]+) -> ([^ ]+)/\1=\2/p' | sort_unique_keep_order) +IFS=$'\n' read -r -d '' -a all_ns_arr < <(printf "%s\n" "${xml_info[1]}" | sed -nE '/^n +1 / s/^n +1 ([^ ]+) -> ([^ ]+)/\1=\2/p') +IFS=$'\n' read -r -d '' -a root_ns_arr < <(printf "%s\n" "${xml_info[0]}" | sed -nE '/^n +1 / s/^n +1 ([^ ]+) -> ([^ ]+)/\1=\2/p' | sort_unique_keep_order) root_and_all=( "${root_ns_arr[@]}" ) root_and_all+=( "${all_ns_arr[@]}" ) declare -a arrns #arrns+=( "${root_ns_arr[@]}" ) -arrns[0]="$( for z in "${root_ns_arr[@]}";do [[ "$z" =~ ^default ]] && echo "$z";done)" +arrns[0]="$( for z in "${root_ns_arr[@]}";do [[ "$z" =~ ^default ]] && echo "$z" | tr -d '[[:space:]]';done)" k=1 while IFS=$'\n' read -r line;do #echo "<<< $k $line '$defns'" @@ -585,17 +553,19 @@ print_separator IFS=$'\n' read -r -d '' -a unique_ns_arr < <(printf "%s\n" "${root_and_all[@]}" | make_unique_ns_arr | grep -v '^ *$') if [ "${#root_ns_arr[@]}" -gt 0 ] || [ "${#unique_ns_arr[@]}" -gt 0 ];then - print_separator "\nOriginal Namespaces:" + print_separator "\nOriginal root Namespaces:" print_separator "${root_ns_arr[@]}" - print_separator - print_separator "\nMapped Namespaces:" - print_separator "${unique_ns_arr[@]}" + if [ -n "$ns_prefix" ];then + print_separator + print_separator "\nMapped Namespaces:" + printf "%s\n" "${unique_ns_arr[@]}" "" + fi else print_separator "\nNamespaces: None\n" fi print_separator -xml_tree=$(grep -Ev '^ *$|^\/' <<<"${xml_info[3]}") +xml_tree=$(grep -Ev '^ *$|^\/' <<<"${xml_info[2]}") if [ -n "$xml_tree" ];then # Array with elements like ¬element, e.g. 3¬thead @@ -629,15 +599,21 @@ print_separator "\nElements to process (build xpath, add prefix) ${#xml_tree_ilv fi if [ -n "$current_ns" ]; then ns_pfx="$(get_ns_prefix_by_uri "$current_ns")" - elif [ -z "$current_ns" ]; then #&& [ -n "$last_dflt_pfx" ] - ns_pfx="$(get_ns_prefix_by_uri "${ns_by_indent_lvl[$prev_lvl]}")" + elif [ -z "$current_ns" ]; then + # use uri from -s option if passed + if [ -n "${defns#*=}" ];then + ns_pfx="$(get_ns_prefix_by_uri "${defns#*=}")" + else + ns_pfx="$(get_ns_prefix_by_uri "${ns_by_indent_lvl[$prev_lvl]}")" + fi fi + #printf "%s\n" "$j $indent_lvl $prev_lvl '${arrns[$j]}' '${ns_by_indent_lvl[$indent_lvl]}' '${ns_by_indent_lvl[$prev_lvl]}'" else # the element has its own prefix ns_pfx='' fi # no default namespace declared on current level, reuse previous one - if [ -z "${ns_by_indent_lvl[$indent_lvl]}" ];then + if [[ -z "${ns_by_indent_lvl[$indent_lvl]}" && -n "${ns_by_indent_lvl[$prev_lvl]}" && "$prev_lvl" -ge 0 ]];then ns_by_indent_lvl[$indent_lvl]="${ns_by_indent_lvl[$prev_lvl]}" fi if [ -n "$ns_pfx" ];then @@ -662,7 +638,7 @@ print_separator "\nElements to process (build xpath, add prefix) ${#xml_tree_ilv # store current xpath xpath_arr[$indent_lvl]="${xpath}" fi - + idx=${#xpath_all[@]} xpath_all[$idx]="${xprefix}${xpath}" done @@ -697,7 +673,7 @@ print_separator "\nElements to process (build xpath, add prefix) ${#xml_tree_ilv fi fi else - log_error "No xpath found" + log_error "No xpath found\n" clean_tmp_files exit 127 fi