Permalink
Browse files

initial commit

  • Loading branch information...
0 parents commit 20c355fae92185877835840fe61a19d6bbff0894 @mnunberg committed Mar 24, 2012
Showing with 701 additions and 0 deletions.
  1. +20 −0 LICENSE
  2. +14 −0 Makefile
  3. +131 −0 README.pod
  4. +107 −0 json_test.c
  5. +260 −0 jsonsl.c
  6. +169 −0 jsonsl.h
20 LICENSE
@@ -0,0 +1,20 @@
+Copyright (c) 2012 M. Nunberg, mnunberg@haskalah.org
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,14 @@
+
+all: json_test libjsonsl.so
+
+CFLAGS=-Wall -ggdb3 -O0
+
+json_test: json_test.c libjsonsl.so
+ $(CC) $(CFLAGS) $< -o $@ -I. -L. -Wl,-rpath=$(shell pwd) -ljsonsl
+
+libjsonsl.so: jsonsl.c
+ $(CC) -g -ggdb3 -shared -fPIC -o $@ $^
+
+clean:
+ rm -f *.o json_test *.so
+
@@ -0,0 +1,131 @@
+=head1 JSONSL
+
+JSON Stateful (or Simple, or Stacked) Lexer
+
+=head1 Why another (and another) JSON parser?
+
+I took inspiration from some of the uses of I<YAJL>, which looked
+quite nice, but whose build system seemed unusable, source horribly
+mangled, and grown beyond its original design. In other words, I saw
+it as a bunch of cruft.
+
+Instead of bothering to spend a few days figuring out how to use it,
+I came to a conclusion that the tasks I needed (simple token
+notifications coupled with some kind of state shift detection), I could
+do with a simple, small, ANSI C embeddable source file.
+
+I am still not sure if I<YAJL> provides the featureset of I<JSONSL>, but
+I'm guessing I've got at least I<some> innovation.
+
+I<JSONSL>
+
+Inspiration was also taken from Joyent's B<http-parser> project, which
+seems to use a similar, embeddable, and simple model.
+
+Here's a quick featureset
+
+=over
+
+=item Stateful
+
+Maintains state about current descent/recursion/nesting level
+Furthermore, you can access information about 'lower' stacks
+as long as they are activ.
+
+=item Callback oriented, selectively
+
+Invokes callbacks for all sorts of events, but you can control
+which kind of events you are interested in receiving without
+writing a ton of wrapper stubs
+
+=item Non-Buffering
+
+This doesn't buffer, copy, or allocate any data. The only allocation
+overhead is during the initialization of the parser, in which the
+initial stack structures are initialized
+
+=item Simple
+
+Just a C source file, and a corresponding header file. ANSI C.
+
+=back
+
+The rest of this documentation needs work
+
+=head1 Details
+
+=head2 Terminology
+
+Because the JSON spec is quite confusing in its terminology, especially
+when we want to map it to a different model, here is a listing of the
+terminology used here.
+
+I will use I<element>, I<object>, I<nest> interchangeably. They all
+refer to some form of atomic unit as far as JSON is concerned.
+
+I will use the term I<hash> for those things which look like C<{"foo":"bar"}>,
+and refer to its contents as I<keys> and I<values>
+
+I will use the term I<list> for those things which look like C<["hello", "byebye"]>,
+and their contents as I<list elements> or I<array elements> explicitly
+
+
+=head2 Model
+
+JSONSL gives you some basic events about I<state> and I<nesting> events.
+
+A I<state> change is when a given I<nesting> begins or ends: for example
+the string:
+
+ Level 0
+ {
+
+ Level 1
+
+ Level 2
+ "ABC"
+ :
+ Level 2
+ "XYZ"
+ ,
+
+ Level 1
+
+ [
+ Level 2
+
+ {
+ Level 3
+
+ Level 4
+ "Foo":"Bar"
+
+ Level 3
+ }
+ Level 2
+ ]
+ Level 1
+ }
+
+=over
+
+=item The Stack
+
+JSONSL's basic object type is the C<struct jsonsl_nest_st> which may be thought
+of as a stack frame.
+
+The nest contains information about its JSON type,
+the position in the input when it was first created, and the position in the
+input where it last re-gained control.
+
+Stacks can regain control by having an inner stack return (just like in your
+programs).
+
+Stacks are valid and will persist in the parser until they have themselves
+'returned' - meaning when their closing tokens have been encountered.
+
+This allows for some rather powerful manipulation and extraction of smaller
+JSON objects from a larger JSON stream using a high-performance and simple
+C-interface
+
+
@@ -0,0 +1,107 @@
+#include "jsonsl.h"
+#include <stdio.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+
+
+void fmt_level(const char *buf, size_t nbuf, int levels)
+{
+ char *c = buf;
+ int ii;
+ for (ii = 0; ii < levels; ii++) {
+ putchar('\t');
+ }
+
+ while (nbuf && *c) {
+ putchar(*c);
+ if (*c == '\n') {
+ for (ii = 0; ii < levels; ii++) {
+ putchar(' ');
+ }
+ }
+ c++;
+ nbuf--;
+ }
+ putchar('\n');
+}
+
+void nest_callback(jsonsl_t jsn,
+ jsonsl_state_t state,
+ struct jsonsl_nest_st *nest,
+ const char *buf)
+{
+ /* We are called here with the jsn object, the state (BEGIN or END),
+ * the 'nest' object, which contains information about the level of
+ * nesting we are descending into/ascending from, and a pointer to the
+ * start position of the detectin of this nesting
+ */
+
+ printf("at byte %d: Got nest callback, nesting=%d state=%c, T='%c'\n",
+ jsn->pos, nest->level, state, nest->type);
+ if (state == JSONSL_STATE_END) {
+ size_t nest_len = nest->pos_cur - nest->pos_begin;
+
+ char *buf_begin = buf - nest_len;
+ printf("Item closed, %d bytes long\n", nest_len);
+ fmt_level(buf_begin, nest_len, nest->level);
+ }
+}
+
+int error_callback(jsonsl_t jsn,
+ jsonsl_error_t err,
+ struct jsonsl_nest_st *nest,
+ char *errat)
+{
+ /* Error callback. In theory, this can return a true value
+ * and maybe 'correct' and seek ahead of the buffer, and try to
+ * do some correction.
+ */
+ printf("Got parse error at '%c'\n", *errat);
+ printf("Error is %d\n", err);
+ printf("Remaining text: %s\n", errat);
+ abort();
+ return 0;
+}
+
+int main(void)
+{
+ char buf[8092];
+ size_t nread = 0;
+ int fd;
+ jsonsl_t jsn;
+
+ fd = open("txt", O_RDONLY);
+ assert(fd >= 0);
+
+ /* Declare that we will support up to 512 nesting levels.
+ * Each level of nesting requires about ~40 bytes (allocated at initialization)
+ * to maintain state information.
+ */
+ jsn = jsonsl_new(512);
+
+ /* Set up our error callbacks (to be called when an error occurs)
+ * and a nest callback (when a level changes in 'nesting')
+ */
+ jsn->error_callback = error_callback;
+ jsn->nest_callback = nest_callback;
+
+ /* Declare that we're intertested in receiving callbacks about
+ * json 'Object' and 'List' types.
+ */
+
+ jsn->call_OBJECT = 1;
+ jsn->call_LIST = 1;
+
+ /* read into the buffer */
+
+ /**
+ * To avoid recomputing offsets and relative positioning,
+ * we will maintain the buffer, but this is not strictly required.
+ */
+ nread = read(fd, buf, 8092);
+ jsonsl_feed(jsn, buf, nread);
+ jsonsl_destroy(jsn);
+ return 0;
+}
Oops, something went wrong.

0 comments on commit 20c355f

Please sign in to comment.