Skip to content

Commit

Permalink
passing the first tests, seems to be working splendidly
Browse files Browse the repository at this point in the history
  • Loading branch information
quartzjer committed Sep 23, 2010
1 parent 8084600 commit a058668
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 21 deletions.
1 change: 1 addition & 0 deletions .gitignore
@@ -0,0 +1 @@
js0n_test
2 changes: 1 addition & 1 deletion README
@@ -1,6 +1,6 @@
js0n - the "cheapest" c json parser possible? js0n - the "cheapest" c json parser possible?


A one-pass super low overhead parsing walk of the raw bytes and no mem copying of any sort, fills in a simple array of offsets and lengths of the first depth array values or object key/values. A one-pass super low overhead parsing walk of the raw bytes and no mem copying of any sort, fills in a simple array of offsets and lengths of the first depth array values or object key/values. It should parse any valid json, but trades full validation for efficiency (some invalid json will still parse). Excellent for low level high speed scanning/routing of small chunks of json.


Parsing this: Parsing this:


Expand Down
46 changes: 29 additions & 17 deletions js0n.c
@@ -1,19 +1,22 @@
// by jeremie miller - 2010
// public domain, contributions/improvements welcome via github


int js0n(unsigned char *js, unsigned int len, unsigned short **out) // opportunity to further optimize would be having different jump tables for higher depths
#define PUSH(i) if(depth == 1) *out++ = ((cur+i) - js)
#define CAP(i) if(depth == 1) *out++ = (cur+i) - (js + *(out-1)) + 1

int js0n(unsigned char *js, unsigned int len, unsigned short *out)
{ {
unsigned char buff[1024], *cur, *end; unsigned char *cur, *end;
int mode=0; // 0=structures 1=strings
int depth=0; int depth=0;
int kind=0; // 0=obj 1=array
char quote;
static void *gostruct[] = static void *gostruct[] =
{ {
[0 ... 255] = &&l_bad, [0 ... 255] = &&l_bad,
['\t'] = &&l_ws, [' '] = &&l_ws, ['\r'] = &&l_ws, ['\n'] = &&l_ws, ['\t'] = &&l_ws, [' '] = &&l_ws, ['\r'] = &&l_ws, ['\n'] = &&l_ws,
['"'] = &&l_quot, ['"'] = &&l_quot,
[':'] = &&l_is,[','] = &&l_more, [':'] = &&l_is,[','] = &&l_more,
['['] = &&l_ko, [']'] = &&l_kc, ['['] = &&l_up, [']'] = &&l_down, // tracking [] and {} individually would allow fuller validation but is really messy
['{'] = &&l_ko, ['}'] = &&l_kc, ['{'] = &&l_up, ['}'] = &&l_down,
['-'] = &&l_bare, [48 ... 57] = &&l_bare, // 0-9 ['-'] = &&l_bare, [48 ... 57] = &&l_bare, // 0-9
['t'] = &&l_bare, ['f'] = &&l_bare, ['n'] = &&l_bare // true, false, null ['t'] = &&l_bare, ['f'] = &&l_bare, ['n'] = &&l_bare // true, false, null
}; };
Expand Down Expand Up @@ -46,31 +49,38 @@ int js0n(unsigned char *js, unsigned int len, unsigned short **out)
l_loop:; l_loop:;
} }


return depth; // 0 if successful full parse, >0 for incomplete data

l_bad: l_bad:
printf("bad char %d\n",*cur);
return 1; return 1;


l_char: l_char:
printf("%c",mode?'.':'*');
goto l_loop; goto l_loop;


l_ko: l_up:
printf("%d%c",++depth,*cur); PUSH(0);
++depth;
goto l_loop; goto l_loop;
l_kc:
printf("%d%c",--depth,*cur); l_down:
--depth;
CAP(0);
goto l_loop; goto l_loop;


l_ws: l_ws:
l_is: l_is:
l_more: l_more:
printf("%c",*cur);
goto l_loop; goto l_loop;


l_quot: l_quot:
printf("%c",*cur); if(go==gostruct)
go = mode?gostruct:gostring; {
mode^=1; PUSH(1);
go=gostring;
}else{
CAP(-1);
go=gostruct;
}
goto l_loop; goto l_loop;


l_esc: l_esc:
Expand All @@ -82,10 +92,12 @@ int js0n(unsigned char *js, unsigned int len, unsigned short **out)
goto l_loop; goto l_loop;


l_bare: l_bare:
PUSH(0);
go = gobare; go = gobare;
goto l_loop; goto l_loop;


l_unbare: l_unbare:
CAP(-1);
go = gostruct; go = gostruct;
goto *go[*cur]; goto *go[*cur];


Expand Down
2 changes: 1 addition & 1 deletion js0n.h
@@ -1,6 +1,6 @@


// pass it a raw json string and length, it will return an array of unsigned shorts, see js0n.c for more // pass it a raw json string and length, it will return an array of unsigned shorts, see js0n.c for more
int js0n(unsigned char *js, unsigned int len, unsigned short **out); int js0n(unsigned char *js, unsigned int len, unsigned short *out);


// TODO: decode a string value, copy into new while unescaping things and unicode stuffs // TODO: decode a string value, copy into new while unescaping things and unicode stuffs
// int js0n(unsigned char *in, unsigned int len, unsigned char *out); // int js0n(unsigned char *in, unsigned int len, unsigned char *out);
12 changes: 10 additions & 2 deletions js0n_test.c
Expand Up @@ -7,8 +7,8 @@
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
unsigned char buff[1024], *json; unsigned char buff[1024], *json;
int len, lastlen=0; int len, lastlen=0, ret, i;
unsigned short **res; unsigned short *res;
FILE *f; FILE *f;


if((f = fopen(argv[1],"r")) == NULL) if((f = fopen(argv[1],"r")) == NULL)
Expand All @@ -24,6 +24,14 @@ int main(int argc, char **argv)
} }
fclose(f); fclose(f);
printf("got[%.*s]\n",lastlen,json); printf("got[%.*s]\n",lastlen,json);
res = malloc(lastlen); // way more than enough
bzero(res,lastlen);
ret = js0n(json,lastlen,res);
printf("returned %d\n",ret);
for(i=0;res[i];i+=2)
{
printf("%d: at %d len %d is %.*s\n",i,res[i],res[i+1],res[i+1],json+res[i]);
}
return 0; return 0;
} }


0 comments on commit a058668

Please sign in to comment.