Skip to content

Commit

Permalink
Tokenizer changes:
Browse files Browse the repository at this point in the history
* split up the tokenizer regex for readability
* ignore inter-token whitespace
* allow backslash-escapes in strings
* disallow control characters in strings
* clean up number regex and don't allow . (or the empty string)
* fall-through token-matching case is . leaving the parser to validate it.

Parser changes:
* disallow non-string keys in objects
* detect unrecognized tokens, e.g. @ A $ etc.
* consistent quoting on case patterns
* use ${token:-EOF} in error messages

fixed wrong tests (and removed the wrong-test-generator)
added new tests.
fix test/valid-test.sh to count failures like test/invalid-test.sh
removed unnecessary outlog/errlog files (and add to .gitignore)
  • Loading branch information
medgar123 committed Oct 25, 2011
1 parent 65b1ddd commit f6e5cd2
Show file tree
Hide file tree
Showing 31 changed files with 97 additions and 113 deletions.
5 changes: 2 additions & 3 deletions .gitignore
@@ -1,3 +1,2 @@
node_modules
node_modules/*
npm_debug.log
test/errlog
test/outlog
2 changes: 1 addition & 1 deletion all-tests.sh
Expand Up @@ -27,6 +27,6 @@ done
if [ $fail -eq 0 ]; then
echo -n 'SUCCESS '
else
echo -n 'FAILOUR '
echo -n 'FAILURE '
fi
echo $passed / $tests
1 change: 0 additions & 1 deletion errlog

This file was deleted.

105 changes: 51 additions & 54 deletions parse.sh
@@ -1,93 +1,90 @@

throw () {
echo $* >&2
echo "$*" >&2
exit 1
}

tokenize () {
egrep -ao '[]|[{}]|:|,|("((\\")|[^"])*")|:|(\-?[0-9]*\.?([0-9]*)?(e?\-?([0-9]*))?)|null|true|false' --color=never
local ESCAPE='(\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})'
local CHAR='[^[:cntrl:]"\\]'
local STRING="\"$CHAR*($ESCAPE$CHAR*)*\""
local NUMBER='-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?'
local KEYWORD='null|false|true'
local SPACE='[[:space:]]+'
egrep -ao "$STRING|$NUMBER|$KEYWORD|$SPACE|." --color=never |
egrep -v "^$SPACE$" # eat whitespace
}

parse_array () {
local index=0
local ary=''
read token
read -r token
while true;
do
key=$index
case "$token" in
']') break ;;
*)
parse_value "$1" "$index"
let index=$index+1
ary="$ary""$value"
read token
case "$token" in
']') break ;;
',') ary="$ary", ;;
*)
if [ "_$token" = _ ]; then token=EOF; fi
throw "EXPECTED ] or , GOT $token"
;;
esac
read token
;;
esac
parse_value "$1" "$index"
let index=$index+1
ary="$ary""$value"
read -r token
case "$token" in
']') break ;;
',') ary="$ary", ;;
*) throw "EXPECTED ] or , GOT ${token:-EOF}" ;;
esac
read -r token
done
value=`printf '[%s]' $ary`
}

parse_object () {
local go=true
local key
local obj=''
local EXPECT_COMMA=0
local EXPECT_COLON=0
read token
while [ "$go" = true ];
read -r token
while :
do
case "$token" in
'}') break ;;
*)

key=$token
read colon
if [ "$colon" != ':' ]; then throw "EXPECTED COLON, GOT $colon"; fi
if [ "_$key" = _ ]; then throw "NULL KEY"; fi
read token
parse_value "$1" "$key"
obj="$obj$key:$value"

read token
case "$token" in
'}') break;;
,) obj="$obj,"; read token ;;
*)
if [ "_$token" = _ ]; then token=EOF; fi
throw "EXPECTED , or }, but got $token"
;;
esac
;;
'"'*'"') key=$token ;;
*) throw "EXPECTED STRING, GOT ${token:-EOF}" ;;
esac
read -r token
case "$token" in
':') ;;
*) throw "EXPECTED COLON, GOT ${token:-EOF}" ;;
esac
read -r token
parse_value "$1" "$key"
obj="$obj$key:$value"
read -r token
case "$token" in
'}') break;;
',') obj="$obj,"; read -r token ;;
*) throw "EXPECTED , or }, but got ${token:-EOF}" ;;
esac
done
value=`printf '{%s}' "$obj"`
}

parse_value () {
local jpath

if [ "x$1" = "x" ]; then jpath="$2"; else jpath="$1,$2"; fi

local jpath="${1:+$1,}$2"
case "$token" in
{) parse_object "$jpath" ;;
[) parse_array "$jpath" ;;
','|'}'|']') throw "EXPECTED value, GOT $token" ;;
*) value=$token
;;
'{') parse_object "$jpath" ;;
'[') parse_array "$jpath" ;;
# At this point, the only valid single-character tokens are digits.
''|[^0-9]) throw "EXPECTED value, GOT ${token:-EOF}" ;;
*) value=$token ;;
esac
printf "[%s]\t%s\n" "$jpath" "$value"
}

parse () {
read token
read -r token
parse_value
read -r token
case "$token" in
'') ;;
*) throw "EXPECTED EOF, GOT $token" ;;
esac
}
10 changes: 0 additions & 10 deletions test/.generate-valid

This file was deleted.

1 change: 0 additions & 1 deletion test/errlog

This file was deleted.

18 changes: 8 additions & 10 deletions test/invalid-test.sh
Expand Up @@ -13,21 +13,19 @@ echo PWD=$PWD
fails=0
for input in invalid/*
do
cat $input | ../bin/json_parse > outlog 2> errlog
ret=$?
if [ $ret -eq 0 ]; then
if ../bin/json_parse < "$input" > outlog 2> errlog
then
echo "NOT OK: cat $input | ../bin/json_parse SHOULD FAIL"
echo "OUTPUT WAS >>>"
cat outlog
echo "<<<"
let fails=$fails+1
else
echo "OK: cat $input | ../bin/json_parse failed correctly"
echo "stderr was >>>"
cat errlog
echo "<<<"
# else
# echo "OK: cat $input | ../bin/json_parse failed correctly"
# echo "stderr was >>>"
# cat errlog
# echo "<<<"
fi

done
echo "$fails test(s) failed"
exit $fails
exit $fails
1 change: 1 addition & 0 deletions test/invalid/bad_unicode_sequence.json
@@ -0,0 +1 @@
"hello\u20world"
1 change: 1 addition & 0 deletions test/invalid/bareword.json
@@ -0,0 +1 @@
bareword
1 change: 1 addition & 0 deletions test/invalid/bracket_key.json
@@ -0,0 +1 @@
{[: "bad"}
1 change: 1 addition & 0 deletions test/invalid/colon.json
@@ -0,0 +1 @@
:
3 changes: 3 additions & 0 deletions test/invalid/colon_obj.json
@@ -0,0 +1,3 @@
{
"hello": :
}
1 change: 1 addition & 0 deletions test/invalid/control_char_in_string.json
@@ -0,0 +1 @@
"ab"
1 change: 1 addition & 0 deletions test/invalid/decimal_point.json
@@ -0,0 +1 @@
.
File renamed without changes.
1 change: 1 addition & 0 deletions test/invalid/false_key.json
@@ -0,0 +1 @@
{false: "bad"}
1 change: 1 addition & 0 deletions test/invalid/null_key.json
@@ -0,0 +1 @@
{null: "bad"}
1 change: 1 addition & 0 deletions test/invalid/number_key.json
@@ -0,0 +1 @@
{5: "bad"}
1 change: 1 addition & 0 deletions test/invalid/trailing_garbage.json
@@ -0,0 +1 @@
[1,2,3]'
1 change: 1 addition & 0 deletions test/invalid/true_key.json
@@ -0,0 +1 @@
{true: "bad"}
1 change: 1 addition & 0 deletions test/invalid/unclosed_string.json
@@ -0,0 +1 @@
"Hello world
1 change: 1 addition & 0 deletions test/invalid/weird.json
@@ -0,0 +1 @@
@
1 change: 1 addition & 0 deletions test/invalid/weird_key.json
@@ -0,0 +1 @@
{@: "bad"}
Empty file removed test/out
Empty file.
1 change: 0 additions & 1 deletion test/outlog

This file was deleted.

4 changes: 2 additions & 2 deletions test/tokenizer-test.sh
Expand Up @@ -4,7 +4,7 @@ __filename=`readlink -f $0`
__dirname=`dirname $__filename`
cd $__dirname

. $__dirname/../parse.sh
. ../parse.sh
set -e

diff <( echo '"dah"' | tokenize ) <( echo '"dah"' )
Expand Down Expand Up @@ -37,4 +37,4 @@ diff <( echo '[ null , -110e10, "null" ]' \
diff <( echo '{"e": false}' | tokenize ) <( printf '{\n"e"\n:\nfalse\n}\n' )
diff <( echo '{"e": "string"}' | tokenize ) <( printf '{\n"e"\n:\n"string"\n}\n' )

cat ../package.json | tokenize
cat ../package.json | tokenize
38 changes: 11 additions & 27 deletions test/valid-test.sh
Expand Up @@ -5,30 +5,14 @@ __dirname=`dirname $__filename`
cd $__dirname

set -e
# valid/array.json
diff <(cat valid/array.json | ../bin/json_parse) valid/array.parsed
echo OK valid/array.json
# valid/empty_array.json
diff <(cat valid/empty_array.json | ../bin/json_parse) valid/empty_array.parsed
echo OK valid/empty_array.json
# valid/empty_object.json
diff <(cat valid/empty_object.json | ../bin/json_parse) valid/empty_object.parsed
echo OK valid/empty_object.json
# valid/many_object.json
diff <(cat valid/many_object.json | ../bin/json_parse) valid/many_object.parsed
echo OK valid/many_object.json
# valid/nested_array.json
diff <(cat valid/nested_array.json | ../bin/json_parse) valid/nested_array.parsed
echo OK valid/nested_array.json
# valid/nested_object.json
diff <(cat valid/nested_object.json | ../bin/json_parse) valid/nested_object.parsed
echo OK valid/nested_object.json
# valid/number.json
diff <(cat valid/number.json | ../bin/json_parse) valid/number.parsed
echo OK valid/number.json
# valid/object.json
diff <(cat valid/object.json | ../bin/json_parse) valid/object.parsed
echo OK valid/object.json
# valid/string.json
diff <(cat valid/string.json | ../bin/json_parse) valid/string.parsed
echo OK valid/string.json
fails=0
for input in valid/*.json
do
expected="${input%.json}.parsed"
if ! ../bin/json_parse < "$input" | diff -u - "$expected"
then
let fails=$fails+1
fi
done
echo "$fails test(s) failed"
exit $fails
4 changes: 2 additions & 2 deletions test/valid/nested_array.parsed
Expand Up @@ -5,5 +5,5 @@
[2,2] {}
[2] [4,"hello",{}]
[3,"array"] []
[3] {0:[]}
[] [1,[],[4,"hello",{}],{0:[]}]
[3] {"array":[]}
[] [1,[],[4,"hello",{}],{"array":[]}]
2 changes: 1 addition & 1 deletion test/valid/nested_object.parsed
Expand Up @@ -2,4 +2,4 @@
["object","empty"] {}
["object"] {"key":"value","empty":{}}
["number"] 5
[] {"empty":{"key":"value","empty":{}},"number":5}
[] {"object":{"key":"value","empty":{}},"number":5}
1 change: 1 addition & 0 deletions test/valid/tab_escape.json
@@ -0,0 +1 @@
"hello\tworld"
1 change: 1 addition & 0 deletions test/valid/tab_escape.parsed
@@ -0,0 +1 @@
[] "hello\tworld"

0 comments on commit f6e5cd2

Please sign in to comment.