Permalink
Browse files

Version 1.0

  • Loading branch information...
1 parent ea2cfbb commit 84840a68afa9b4324a14af7d7284e8efdf377e1a stepk committed Apr 6, 2013
Showing with 344 additions and 1 deletion.
  1. +240 −0 JSON.awk
  2. +15 −0 LICENSE.APACHE2
  3. +24 −0 LICENSE.MIT
  4. +65 −1 README.md
View
@@ -0,0 +1,240 @@
+#!/bin/awk -f
+#
+# Software: JSON.awk - a practical JSON parser written in awk
+# Version: 1.0
+# Author: step- on github.com
+# License: This software is licensed under the MIT or the Apache 2 license.
+# Project home: https://github.com/step-/JSON.awk.git
+# Credits: This software includes major portions of JSON.sh, a pipeable JSON
+# parser written in Bash, retrieved on 20130313
+# https://github.com/dominictarr/JSON.sh
+#
+
+BEGIN { #{{{
+ # option_BRIEF(1) - parse() omits printing non-leaf nodes
+ BRIEF=1;
+ # option_STREAM(0) - parse() omits stdout and stores jpaths in JPATHS[]
+ STREAM=1;
+ # for each input file:
+ # TOKENS[], NTOKENS, ITOKENS - tokens after tokenize()
+ # JPATHS[], NJPATHS - parsed data (when STREAM=0)
+ # at script exit:
+ # FAILS[] - maps names of invalid files to logged error lines
+ delete FAILS
+
+ # filepathnames from stdin
+ # usage: echo -e "file1\nfile2\n" | awk -f JSON.awk
+ # usage: { echo -; echo; cat; } | awk -f JSON.awk
+ while (getline ARGV[++ARGC] < "/dev/stdin") {
+ if (ARGV[ARGC] == "")
+ break
+ }
+ # set file slurping mode
+ srand(); RS="n/o/m/a/t/c/h" rand()
+}
+#}}}
+
+{ # main loop: process each file in turn {{{
+ reset() # See important application note in reset()
+
+ tokenize($0) # while(get_token()) {print TOKEN}
+ if (0 == parse()) {
+ apply(JPATHS, NJPATHS)
+ }
+}
+#}}}
+
+END { # process invalid files {{{
+ for(name in FAILS) {
+ print "invalid: " name
+ print FAILS[name]
+ }
+}
+#}}}
+
+function apply (ary, size, i) { # stub {{{
+ for (i=1; i<size; i++)
+ print ary[i]
+}
+#}}}
+
+function get_token() { #{{{
+# usage: {tokenize($0); while(get_token()) {print TOKEN}}
+
+ # return getline TOKEN # for external tokenizer
+
+ TOKEN = TOKENS[++ITOKENS] # for internal tokenize()
+ return ITOKENS < NTOKENS
+}
+#}}}
+
+function parse_array(a1, idx,ary,ret) { #{{{
+ idx=0
+ ary=""
+ get_token()
+#scream("parse_array(" a1 ") TOKEN=" TOKEN)
+ if (TOKEN != "]") {
+ while (1) {
+ if (ret = parse_value(a1, idx)) {
+ return ret
+ }
+ idx=idx+1
+ ary=ary VALUE
+ get_token()
+ if (TOKEN == "]") {
+ break
+ } else if (TOKEN == ",") {
+ ary = ary ","
+ } else {
+ report(", or ]", TOKEN ? TOKEN : "EOF")
+ return 2
+ }
+ get_token()
+ }
+ }
+ if (1 != BRIEF) {
+ VALUE=sprintf("[%s]", ary)
+ } else {
+ VALUE=""
+ }
+ return 0
+}
+#}}}
+
+function parse_object(a1, key,obj) { #{{{
+ obj=""
+ get_token()
+#scream("parse_object(" a1 ") TOKEN=" TOKEN)
+ if (TOKEN != "}") {
+ while (1) {
+ if (TOKEN ~ /^".*"$/) {
+ key=TOKEN
+ } else {
+ report("string", TOKEN ? TOKEN : "EOF")
+ return 3
+ }
+ get_token()
+ if (TOKEN != ":") {
+ report(":", TOKEN ? TOKEN : "EOF")
+ return 4
+ }
+ get_token()
+ if (parse_value(a1, key)) {
+ return 5
+ }
+ obj=obj key ":" VALUE
+ get_token()
+ if (TOKEN == "}") {
+ break
+ } else if (TOKEN == ",") {
+ obj=obj ","
+ } else {
+ report(", or }", TOKEN ? TOKEN : "EOF")
+ return 6
+ }
+ get_token()
+ }
+ }
+ if (1 != BRIEF) {
+ VALUE=sprintf("{%s}", obj)
+ } else {
+ VALUE=""
+ }
+ return 0
+}
+#}}}
+
+function parse_value(a1,a2, jpath,ret,x) { #{{{
+ jpath=(a1!="" ? a1 "," : "") a2 # "${1:+$1,}$2"
+#scream("parse_value(" a1 "," a2 ") TOKEN=" TOKEN ", jpath=" jpath)
+ if (TOKEN == "{") {
+ if (parse_object(jpath)) {
+ return 7
+ }
+ } else if (TOKEN == "[") {
+ if (ret = parse_array(jpath)) {
+ return ret
+ }
+ } else if (TOKEN ~ /^(|[^0-9])$/) {
+ # At this point, the only valid single-character tokens are digits.
+ report("value", TOKEN!="" ? TOKEN : "EOF")
+ return 9
+ } else {
+ VALUE=TOKEN
+ }
+ if (! (1 == BRIEF && ("" == jpath || "" == VALUE))) {
+ x=sprintf("[%s]\t%s", jpath, VALUE)
+ if(0 == STREAM) {
+ JPATHS[++NJPATHS] = x
+ } else {
+ print x
+ }
+ }
+ return 0
+}
+#}}}
+
+function parse( ret) { #{{{
+ get_token()
+ if (ret = parse_value()) {
+ return ret
+ }
+ if (get_token()) {
+ report("EOF", TOKEN)
+ return 11
+ }
+ return 0
+}
+#}}}
+
+function report(expected, got) { #{{{
+ scream("expected <" expected "> but got <" got "> at input token " ITOKENS)
+}
+#}}}
+
+function reset() { #{{{
+# Application Note:
+# If you need to build JPATHS[] incrementally from multiple input files:
+# 1) Comment out below: delete JPATHS; NJPATHS=0
+# otherwise each new input file would reset JPATHS[].
+# 2) Move the call to apply() from the main loop to the END statement.
+# 3) In the main loop consider adding code that deletes partial JPATHS[]
+# elements that would result from parsing invalid JSON files.
+
+ TOKEN=""; delete TOKENS; NTOKENS=ITOKENS=0
+ delete JPATHS; NJPATHS=0
+ VALUE=""
+}
+#}}}
+
+function scream(msg) { #{{{
+ FAILS[FILENAME] = FAILS[FILENAME] (FAILS[FILENAME]!="" ? "\n" : "") msg
+ msg = FILENAME ": " msg
+ print msg >"/dev/stderr"
+}
+#}}}
+
+function tokenize(a1, pq,ESCAPE,CHAR,STRING,NUMBER,KEYWORD,SPACE) { #{{{
+# usage A: {for(i=1; i<=tokenize($0); i++) print TOKENS[i]}
+# see also get_token()
+
+ # POSIX character classes (gawk) - contact me for non-[:class:] notation
+ ESCAPE="(\\[^u[:cntrl:]]|\\u[0-9a-fA-F]{4})"
+ CHAR="[^[:cntrl:]\\\"]"
+ STRING="\"" CHAR "*(" ESCAPE CHAR "*)*\""
+ NUMBER="-?(0|[1-9][0-9]*)([.][0-9]*)?([eE][+-]?[0-9]*)?"
+ KEYWORD="null|false|true"
+ SPACE="[[:space:]]+"
+
+ pq="/p/r/e/s/e/r/v/e/q/u/o/t/e/" rand() # KLUDGE to preserve escaped quotes \" in strings
+ gsub(/\\"/, pq, a1) # w/o this some awk implementations incorrectly break STRING on \"
+ gsub(STRING "|" NUMBER "|" KEYWORD "|" SPACE "|.", "\n&", a1)
+ gsub(pq, "\\\"", a1) # KLUDGE ditto
+ gsub("\n" SPACE, "\n", a1)
+ sub(/^\n/, "", a1)
+ ITOKENS=0 # get_token() helper
+ return NTOKENS = split(a1, TOKENS, /\n/)
+}
+#}}}
+
+# vim:fdm=marker:
View
@@ -0,0 +1,15 @@
+Apache License, Version 2.0
+
+Copyright (c) 2013 step-
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
View
@@ -0,0 +1,24 @@
+The MIT License
+
+Copyright (c) 2013 step-
+
+Permission is hereby granted, free of charge,
+to any person obtaining a copy of this software and
+associated documentation files (the "Software"), to
+deal in the Software without restriction, including
+without limitation the rights to use, copy, modify,
+merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom
+the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
+ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
View
@@ -1,4 +1,68 @@
JSON.awk
========
-https://bitbucket.org/ixtab/kindlelauncher/overview
+A practical JSON parser written in awk.
+
+Quick Start
+-----------
+
+This software is based on [JSON.sh](https://github.com/dominictarr/JSON.sh), a pipeable JSON parser written in Bash, retrieved on 2013-03-13 and herein ported to awk. JSON.awk is a self-contained script with no external dependencies.
+
+Features
+--------
+
+* JSON.sh compatible output format (as of 2013-03-13)
+* Can parse one or multiple input files in a single invocation
+* Captures invalid JSON input and processes it upon exiting
+* Written for awk, does not require gawk extensions
+* Does not depend on external programs
+* Choice of MIT or Apache 2 license
+
+Setup
+-----
+
+Just drop the file JSON.awk in your project folder and run it as an awk script. You need to specify input arguments in a slightly unconventional way, so pay attention to usage notes.
+
+Usage
+-----
+
+JSON.awk takes no input arguments on the command-line. Instead it reads a list of input filenames from stdin, one filename per line. An empty line marks the end of the list:
+
+```sh
+echo -e "file1\nfile2\n" | awk -f JSON.awk
+```sh
+
+Of course you can use redirection instead of piping:
+
+```sh
+ echo -e "file1\nfile2\n" > list && awk -f JSON.awk < list
+```sh
+
+To pass JSON from stdin you can use:
+
+```sh
+ { echo -; echo; cat; } | awk -f JSON.awk
+```sh
+
+Real-Life Examples
+------------------
+
+* [KindleLauncher](https://bitbucket.org/ixtab/kindlelauncher/overview) a.k.a. KUAL, an application launcher for the Kindle e-ink models, uses JSON.awk to parse menu descriptions.
+
+Application Notes
+-----------------
+
+Within a single invocation JSON.awk processes each input file separately from all other input files. This means that it resets internal data structures upon reading each input file. However your application may need to process all files as a single lump. To enable such mode please read the comments in function reset() in the source code.
+
+License
+-------
+
+This software is available under the following licenses:
+
+* MIT
+* Apache 2
+
+Credits
+=======
+
+Without [JSON.sh](https://github.com/dominictarr/JSON.sh) this software would not exist. It owes JSON.sh its entire tokenizer and parser logic.

0 comments on commit 84840a6

Please sign in to comment.