Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[regex] Parse quoted regex metacharacters as bash does.
bash-completion relies on this odd behavior. Also extract a gold test case and a benchmark from it (testdata/parse-help/excerpt.sh). The benchmark still needs to be automated.
- Loading branch information
Andy Chu
committed
Feb 4, 2019
1 parent
3958d87
commit e62f99c
Showing
11 changed files
with
571 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/bin/bash | ||
# | ||
# A pure string-processing benchmark extracted from bash-completion. | ||
# | ||
# Usage: | ||
# ./parse-help.sh <function name> | ||
|
||
set -o nounset | ||
set -o pipefail | ||
set -o errexit | ||
|
||
readonly DATA_DIR='testdata/parse-help' | ||
readonly EXCERPT=testdata/parse-help/excerpt.sh | ||
|
||
# TODO: Check these in to testdata/parse-help | ||
collect() { | ||
mkdir -p $DATA_DIR | ||
|
||
ls --help > $DATA_DIR/ls.txt | ||
~/.local/bin/mypy --help > $DATA_DIR/mypy.txt | ||
|
||
wc -l $DATA_DIR/* | ||
} | ||
|
||
shorten() { | ||
egrep '^[ ]+-' $DATA_DIR/ls.txt | head -n 2 | tee $DATA_DIR/ls-short.txt | ||
} | ||
|
||
run-cmd() { | ||
local sh=$1 | ||
local cmd=$2 | ||
# read from stdin | ||
time cat $DATA_DIR/$cmd.txt \ | ||
| $sh $EXCERPT _parse_help - | ||
} | ||
|
||
# Geez: | ||
# ls mypy | ||
# bash 25ms 25ms | ||
# OSH 600ms 900ms There is a lot of variance here too. | ||
|
||
# Well I guess that is 25x slower? It's a computationally expensive thing. | ||
# Oh part of this is because printf is not a builtin! Doh. | ||
# | ||
# TODO | ||
# - count the number of printf invocations. But you have to do it recursively! | ||
# - Turn this into a proper benchmark with an HTML page. | ||
|
||
all() { | ||
wc -l $DATA_DIR/* | ||
|
||
for sh in bash bin/osh; do | ||
echo | ||
echo "--- $sh --- " | ||
echo | ||
|
||
for cmd in ls-short ls mypy; do | ||
run-cmd $sh $cmd >/dev/null | ||
done | ||
done | ||
} | ||
|
||
one() { | ||
local sh='bin/osh' | ||
local cmd='ls-short' | ||
export PS4='+[${LINENO}:${FUNCNAME[0]}] ' | ||
time cat $DATA_DIR/$cmd.txt | $sh -x $EXCERPT _parse_help - | ||
} | ||
|
||
compare-one() { | ||
local cmd='ls-short' | ||
time cat $DATA_DIR/$cmd.txt | bin/osh $EXCERPT _parse_help - | ||
echo --- | ||
time cat $DATA_DIR/$cmd.txt | bash $EXCERPT _parse_help - | ||
} | ||
|
||
"$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/bin/bash | ||
# | ||
# A string processing test case copied from bash_completion. | ||
|
||
# This function shell-quotes the argument | ||
quote() | ||
{ | ||
local quoted=${1//\'/\'\\\'\'} | ||
printf "'%s'" "$quoted" | ||
} | ||
|
||
# This function shell-dequotes the argument | ||
dequote() | ||
{ | ||
eval printf %s "$1" 2> /dev/null | ||
} | ||
|
||
# Helper function for _parse_help and _parse_usage. | ||
__parse_options() | ||
{ | ||
local option option2 i IFS=$' \t\n,/|' | ||
|
||
# Take first found long option, or first one (short) if not found. | ||
option= | ||
local -a array | ||
read -a array <<<"$1" | ||
for i in "${array[@]}"; do | ||
case "$i" in | ||
---*) break ;; | ||
--?*) option=$i ; break ;; | ||
-?*) [[ $option ]] || option=$i ;; | ||
*) break ;; | ||
esac | ||
done | ||
[[ $option ]] || return | ||
|
||
IFS=$' \t\n' # affects parsing of the regexps below... | ||
|
||
# Expand --[no]foo to --foo and --nofoo etc | ||
if [[ $option =~ (\[((no|dont)-?)\]). ]]; then | ||
option2=${option/"${BASH_REMATCH[1]}"/} | ||
option2=${option2%%[<{().[]*} | ||
printf '%s\n' "${option2/=*/=}" | ||
option=${option/"${BASH_REMATCH[1]}"/"${BASH_REMATCH[2]}"} | ||
fi | ||
option=${option%%[<{().[]*} | ||
printf '%s\n' "${option/=*/=}" | ||
} | ||
# Parse GNU style help output of the given command. | ||
# @param $1 command; if "-", read from stdin and ignore rest of args | ||
# @param $2 command options (default: --help) | ||
# | ||
_parse_help() | ||
{ | ||
eval local cmd=$( quote "$1" ) | ||
local line | ||
{ case $cmd in | ||
-) cat ;; | ||
*) LC_ALL=C "$( dequote "$cmd" )" ${2:---help} 2>&1 ;; | ||
esac } \ | ||
| while read -r line; do | ||
[[ $line == *([[:blank:]])-* ]] || continue | ||
# transform "-f FOO, --foo=FOO" to "-f , --foo=FOO" etc | ||
while [[ $line =~ \ | ||
((^|[^-])-[A-Za-z0-9?][[:space:]]+)\[?[A-Z0-9]+\]? ]]; do | ||
line=${line/"${BASH_REMATCH[0]}"/"${BASH_REMATCH[1]}"} | ||
done | ||
__parse_options "${line// or /, }" | ||
done | ||
} | ||
"$@" | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
-a, --all do not ignore entries starting with . | ||
-A, --almost-all do not list implied . and .. |
Oops, something went wrong.