Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

started json parser

  • Loading branch information...
commit 12658d6c52a18b32b5c1cbf98908a56fa1bc4954 1 parent 491028f
Peter Ohler authored
View
8 .gitignore
@@ -0,0 +1,8 @@
+ox-*.gem
+.DS_Store
+\#*\#
+.\#*
+*~
+*.o
+Makefile
+*.bundle
View
7 ext/oj/extconf.rb
@@ -0,0 +1,7 @@
+require 'mkmf'
+
+$CPPFLAGS += ' -Wall'
+#puts "*** $CPPFLAGS: #{$CPPFLAGS}"
+extension_name = 'oj'
+dir_config(extension_name)
+create_makefile(extension_name)
View
156 ext/oj/gen_load.c
@@ -0,0 +1,156 @@
+/* gen_load.c
+ * Copyright (c) 2011, Peter Ohler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "ruby.h"
+#include "oj.h"
+
+static void add_obj(PInfo pi);
+static void end_obj(PInfo pi);
+static void add_array(PInfo pi);
+static void end_array(PInfo pi);
+static void add_key(PInfo pi, char *text);
+static void add_str(PInfo pi, char *text);
+static void add_int(PInfo pi, int64_t val);
+static void add_dub(PInfo pi, double val);
+static void add_true(PInfo pi);
+static void add_false(PInfo pi);
+static void add_nil(PInfo pi);
+
+struct _ParseCallbacks _oj_gen_callbacks = {
+ add_obj,
+ end_obj,
+ add_array,
+ end_array,
+ add_key,
+ add_str,
+ add_int,
+ add_dub,
+ add_nil,
+ add_true,
+ add_false
+};
+
+ParseCallbacks oj_gen_callbacks = &_oj_gen_callbacks;
+
+static inline void
+add_val(PInfo pi, VALUE val) {
+ if (0 == pi->h) {
+ pi->obj = val;
+ } else if (ArrayCode == pi->h->type) {
+ rb_ary_push(pi->h->obj, val);
+ } else if (ObjectCode == pi->h->type) {
+ // TBD
+ } else {
+ raise_error("expected to be in an Array or Hash", pi->str, pi->s);
+ }
+}
+
+static void
+add_obj(PInfo pi) {
+ printf("*** add_obj\n");
+}
+
+static void
+end_obj(PInfo pi) {
+ printf("*** end_obj\n");
+}
+
+static void
+add_array(PInfo pi) {
+ VALUE a = rb_ary_new();
+
+ if (0 == pi->h) {
+ pi->h = pi->helpers;
+ pi->h->obj = a;
+ pi->h->type = ArrayCode;
+ pi->obj = a;
+ } else if (ArrayCode == pi->h->type) {
+ rb_ary_push(pi->h->obj, a);
+ pi->h++;
+ pi->h->obj = a;
+ pi->h->type = ArrayCode;
+ } else if (ObjectCode == pi->h->type) {
+ // TBD
+ } else {
+ raise_error("expected to be in an Array or Hash", pi->str, pi->s);
+ }
+}
+
+static void
+end_array(PInfo pi) {
+ if (0 == pi->h) {
+ // TBD error
+ } else if (pi->helpers < pi->h) {
+ pi->h--;
+ } else {
+ pi->h = 0;
+ }
+}
+
+static void
+add_key(PInfo pi, char *text) {
+ printf("*** add_key %s\n", text);
+}
+
+static void
+add_str(PInfo pi, char *text) {
+ printf("*** add_str %s\n", text);
+}
+
+static void
+add_int(PInfo pi, int64_t val) {
+ printf("*** add_int %lld\n", val);
+}
+
+static void
+add_dub(PInfo pi, double val) {
+ printf("*** add_dub %f\n", val);
+}
+
+static void
+add_true(PInfo pi) {
+ add_val(pi, Qtrue);
+}
+
+static void
+add_false(PInfo pi) {
+ add_val(pi, Qfalse);
+}
+
+static void
+add_nil(PInfo pi) {
+ add_val(pi, Qnil);
+}
View
126 ext/oj/oj.c
@@ -0,0 +1,126 @@
+/* oj.c
+ * Copyright (c) 2011, Peter Ohler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "ruby.h"
+#include "oj.h"
+
+void Init_oj();
+
+VALUE Oj = Qnil;
+
+extern ParseCallbacks oj_gen_callbacks;
+
+
+static VALUE
+load(char *json, int argc, VALUE *argv, VALUE self) {
+ VALUE obj;
+
+ obj = parse(json, oj_gen_callbacks, 0, 0);
+ free(json);
+
+ return obj;
+}
+
+/* call-seq: load(xml, options) => Hash, Array, String, Fixnum, Float, true, false, or nil
+ *
+ * Parses a JSON document String into a Hash, Array, String, Fixnum, Float,
+ * true, false, or nil Raises an exception if the JSON is * malformed or the
+ * classes specified are not valid.
+ * @param [String] json JSON String
+ * @param [Hash] options load options
+ */
+static VALUE
+load_str(int argc, VALUE *argv, VALUE self) {
+ char *json;
+
+ Check_Type(*argv, T_STRING);
+ // the xml string gets modified so make a copy of it
+ json = strdup(StringValuePtr(*argv));
+
+ return load(json, argc - 1, argv + 1, self);
+}
+
+static VALUE
+load_file(int argc, VALUE *argv, VALUE self) {
+ char *path;
+ char *json;
+ FILE *f;
+ unsigned long len;
+
+ Check_Type(*argv, T_STRING);
+ path = StringValuePtr(*argv);
+ if (0 == (f = fopen(path, "r"))) {
+ rb_raise(rb_eIOError, "%s\n", strerror(errno));
+ }
+ fseek(f, 0, SEEK_END);
+ len = ftell(f);
+ if (0 == (json = malloc(len + 1))) {
+ fclose(f);
+ rb_raise(rb_eNoMemError, "Could not allocate memory for %ld byte file.\n", len);
+ }
+ fseek(f, 0, SEEK_SET);
+ if (len != fread(json, 1, len, f)) {
+ fclose(f);
+ rb_raise(rb_eLoadError, "Failed to read %ld bytes from %s.\n", len, path);
+ }
+ fclose(f);
+ json[len] = '\0';
+
+ return load(json, argc - 1, argv + 1, self);
+}
+
+void Init_oj() {
+
+ Oj = rb_define_module("Oj");
+
+ rb_define_module_function(Oj, "load", load_str, -1);
+ rb_define_module_function(Oj, "load_file", load_file, -1);
+}
+
+void
+_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line) {
+ int xline = 1;
+ int col = 1;
+
+ for (; xml < current && '\n' != *current; current--) {
+ col++;
+ }
+ for (; xml < current; current--) {
+ if ('\n' == *current) {
+ xline++;
+ }
+ }
+ rb_raise(rb_eSyntaxError, "%s at line %d, column %d [%s:%d]\n", msg, xline, col, file, line);
+}
View
144 ext/oj/oj.h
@@ -0,0 +1,144 @@
+/* oj.h
+ * Copyright (c) 2011, Peter Ohler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __OJ_H__
+#define __OJ_H__
+
+#if defined(__cplusplus)
+extern "C" {
+#if 0
+} /* satisfy cc-mode */
+#endif
+#endif
+
+#include "ruby.h"
+#ifdef HAVE_RUBY_ENCODING_H
+// HAVE_RUBY_ENCODING_H defined for Ruby 1.9
+#include "ruby/encoding.h"
+#endif
+#include "cache.h"
+
+#ifdef JRUBY
+#define NO_RSTRUCT 1
+#endif
+
+#if (defined RBX_Qnil && !defined RUBINIUS)
+#define RUBINIUS
+#endif
+
+#ifdef RUBINIUS
+#undef T_RATIONAL
+#undef T_COMPLEX
+#define NO_RSTRUCT 1
+#endif
+
+#define raise_error(msg, xml, current) _raise_error(msg, xml, current, __FILE__, __LINE__)
+
+#define MAX_TEXT_LEN 4096
+#define MAX_DEPTH 1024
+
+typedef enum {
+ NoCode = 0,
+ ArrayCode = 'a',
+ String64Code = 'b', // base64 encoded String
+ ClassCode = 'c',
+ Symbol64Code = 'd', // base64 encoded Symbol
+ FloatCode = 'f',
+ RegexpCode = 'g',
+ HashCode = 'h',
+ FixnumCode = 'i',
+ BignumCode = 'j',
+ KeyCode = 'k', // indicates the value is a hash key, kind of a hack
+ RationalCode = 'l',
+ SymbolCode = 'm',
+ FalseClassCode = 'n',
+ ObjectCode = 'o',
+ RefCode = 'p',
+ RangeCode = 'r',
+ StringCode = 's',
+ TimeCode = 't',
+ StructCode = 'u',
+ ComplexCode = 'v',
+ RawCode = 'x',
+ TrueClassCode = 'y',
+ NilClassCode = 'z',
+} Type;
+
+typedef struct _Helper {
+ ID var; /* Object var ID */
+ VALUE obj; /* object created or Qundef if not appropriate */
+ Type type;
+} *Helper;
+
+typedef struct _PInfo *PInfo;
+
+typedef struct _ParseCallbacks {
+ void (*add_obj)(PInfo pi);
+ void (*end_obj)(PInfo pi);
+ void (*add_array)(PInfo pi);
+ void (*end_array)(PInfo pi);
+ void (*add_key)(PInfo pi, char *text);
+ void (*add_str)(PInfo pi, char *text);
+ void (*add_int)(PInfo pi, int64_t val);
+ void (*add_dub)(PInfo pi, double val);
+ void (*add_nil)(PInfo pi);
+ void (*add_true)(PInfo pi);
+ void (*add_false)(PInfo pi);
+} *ParseCallbacks;
+
+/* parse information structure */
+struct _PInfo {
+ struct _Helper helpers[MAX_DEPTH];
+ Helper h; /* current helper or 0 if not set */
+ char *str; /* buffer being read from */
+ char *s; /* current position in buffer */
+ ParseCallbacks pcb;
+ VALUE obj;
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_encoding *encoding;
+#else
+ void *encoding;
+#endif
+ int trace;
+};
+
+extern VALUE parse(char *json, ParseCallbacks pcb, char **endp, int trace);
+extern void _raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
+
+
+extern VALUE Oj;
+
+#if defined(__cplusplus)
+#if 0
+{ /* satisfy cc-mode */
+#endif
+} /* extern "C" { */
+#endif
+#endif /* __OJ_H__ */
View
780 ext/oj/parse.c
@@ -0,0 +1,780 @@
+/* parse.c
+ * Copyright (c) 2011, Peter Ohler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "ruby.h"
+#include "oj.h"
+
+//static void read_instruction(PInfo pi);
+//static void read_doctype(PInfo pi);
+//static void read_comment(PInfo pi);
+//static void read_element(PInfo pi);
+//static void read_text(PInfo pi);
+//static void read_cdata(PInfo pi);
+//static char* read_name_token(PInfo pi);
+//static char* read_quoted_value(PInfo pi);
+//static int read_coded_char(PInfo pi);
+//static void next_non_white(PInfo pi);
+//static int collapse_special(char *str);
+
+static void read_next(PInfo pi);
+static void read_obj(PInfo pi);
+static void read_array(PInfo pi);
+static void read_str(PInfo pi);
+static void read_num(PInfo pi);
+static void read_true(PInfo pi);
+static void read_false(PInfo pi);
+static void read_nil(PInfo pi);
+
+
+/* This XML parser is a single pass, destructive, callback parser. It is a
+ * single pass parse since it only make one pass over the characters in the
+ * XML document string. It is destructive because it re-uses the content of
+ * the string for values in the callback and places \0 characters at various
+ * places to mark the end of tokens and strings. It is a callback parser like
+ * a SAX parser because it uses callback when document elements are
+ * encountered.
+ *
+ * Parsing is very tolerant. Lack of headers and even mispelled element
+ * endings are passed over without raising an error. A best attempt is made in
+ * all cases to parse the string.
+ */
+
+inline static void
+next_non_white(PInfo pi) {
+ for (; 1; pi->s++) {
+ switch(*pi->s) {
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\n':
+ case '\r':
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+inline static void
+next_white(PInfo pi) {
+ for (; 1; pi->s++) {
+ switch(*pi->s) {
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\0':
+ return;
+ default:
+ break;
+ }
+ }
+}
+
+VALUE
+parse(char *json, ParseCallbacks pcb, char **endp, int trace) {
+ struct _PInfo pi;
+
+ if (0 == json) {
+ raise_error("Invalid arg, xml string can not be null", json, 0);
+ }
+ if (trace) {
+ printf("Parsing JSON:\n%s\n", json);
+ }
+ /* initialize parse info */
+ pi.str = json;
+ pi.s = json;
+ pi.h = 0;
+ pi.pcb = pcb;
+ pi.obj = Qnil;
+ pi.encoding = 0;
+ pi.trace = trace;
+ read_next(&pi);
+ next_non_white(&pi); // skip white space
+ if ('\0' != *pi.s) {
+ raise_error("invalid format, extra characters", pi.str, pi.s);
+ }
+ return pi.obj;
+}
+
+static void
+read_next(PInfo pi) {
+ next_non_white(pi); // skip white space
+ switch (*pi->s) {
+ case '{':
+ pi->s++;
+ read_obj(pi);
+ break;
+ case '[':
+ pi->s++;
+ read_array(pi);
+ break;
+ case '"':
+ pi->s++;
+ read_str(pi);
+ break;
+ case '+':
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ read_num(pi);
+ break;
+ case 't':
+ read_true(pi);
+ break;
+ case 'f':
+ read_false(pi);
+ break;
+ case 'n':
+ read_nil(pi);
+ break;
+ case '\0':
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+read_obj(PInfo pi) {
+}
+
+static void
+read_array(PInfo pi) {
+ if (0 != pi->pcb->add_array) {
+ pi->pcb->add_array(pi);
+ }
+ while (1) {
+ read_next(pi);
+ next_non_white(pi); // skip white space
+ if (',' == *pi->s) {
+ pi->s++;
+ } else if (']' == *pi->s) {
+ pi->s++;
+ break;
+ } else {
+ raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s);
+ }
+ }
+ if (0 != pi->pcb->end_array) {
+ pi->pcb->end_array(pi);
+ }
+}
+
+static void
+read_str(PInfo pi) {
+}
+
+static void
+read_num(PInfo pi) {
+}
+
+static void
+read_true(PInfo pi) {
+ pi->s++;
+ if ('r' != *pi->s || 'u' != *(pi->s + 1) || 'e' != *(pi->s + 2)) {
+ raise_error("invalid format, expected 'true'", pi->str, pi->s);
+ }
+ pi->s += 3;
+ if (0 != pi->pcb->add_true) {
+ pi->pcb->add_true(pi);
+ }
+}
+
+static void
+read_false(PInfo pi) {
+ pi->s++;
+ if ('a' != *pi->s || 'l' != *(pi->s + 1) || 's' != *(pi->s + 2) || 'e' != *(pi->s + 3)) {
+ raise_error("invalid format, expected 'false'", pi->str, pi->s);
+ }
+ pi->s += 4;
+ if (0 != pi->pcb->add_false) {
+ pi->pcb->add_false(pi);
+ }
+}
+
+static void
+read_nil(PInfo pi) {
+ pi->s++;
+ if ('u' != *pi->s || 'l' != *(pi->s + 1) || 'l' != *(pi->s + 2)) {
+ raise_error("invalid format, expected 'nil'", pi->str, pi->s);
+ }
+ pi->s += 3;
+ if (0 != pi->pcb->add_nil) {
+ pi->pcb->add_nil(pi);
+ }
+}
+
+
+#if 0
+/* Entered after the "<?" sequence. Ready to read the rest.
+ */
+static void
+read_instruction(PInfo pi) {
+ struct _Attr attrs[MAX_ATTRS + 1];
+ Attr a = attrs;
+ char *target;
+ char *end;
+ char c;
+
+ memset(attrs, 0, sizeof(attrs));
+ target = read_name_token(pi);
+ end = pi->s;
+ next_non_white(pi);
+ c = *pi->s;
+ *end = '\0'; // terminate name
+ if ('?' != c) {
+ while ('?' != *pi->s) {
+ if ('\0' == *pi->s) {
+ raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
+ }
+ next_non_white(pi);
+ a->name = read_name_token(pi);
+ end = pi->s;
+ next_non_white(pi);
+ if ('=' != *pi->s++) {
+ raise_error("invalid format, no attribute value", pi->str, pi->s);
+ }
+ *end = '\0'; // terminate name
+ // read value
+ next_non_white(pi);
+ a->value = read_quoted_value(pi);
+ a++;
+ if (MAX_ATTRS <= (a - attrs)) {
+ raise_error("too many attributes", pi->str, pi->s);
+ }
+ }
+ if ('?' == *pi->s) {
+ pi->s++;
+ }
+ } else {
+ pi->s++;
+ }
+ if ('>' != *pi->s++) {
+ raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
+ }
+ if (0 != pi->pcb->instruct) {
+ pi->pcb->instruct(pi, target, attrs);
+ }
+}
+
+/* Entered after the "<!DOCTYPE" sequence plus the first character after
+ * that. Ready to read the rest. Returns error code.
+ */
+static void
+read_doctype(PInfo pi) {
+ char *docType;
+ int depth = 1;
+ char c;
+
+ next_non_white(pi);
+ docType = pi->s;
+ while (1) {
+ c = *pi->s++;
+ if ('\0' == c) {
+ raise_error("invalid format, prolog not terminated", pi->str, pi->s);
+ } else if ('<' == c) {
+ depth++;
+ } else if ('>' == c) {
+ depth--;
+ if (0 == depth) { /* done, at the end */
+ pi->s--;
+ break;
+ }
+ }
+ }
+ *pi->s = '\0';
+ pi->s++;
+ if (0 != pi->pcb->add_doctype) {
+ pi->pcb->add_doctype(pi, docType);
+ }
+}
+
+/* Entered after "<!--". Returns error code.
+ */
+static void
+read_comment(PInfo pi) {
+ char *end;
+ char *s;
+ char *comment;
+ int done = 0;
+
+ next_non_white(pi);
+ comment = pi->s;
+ end = strstr(pi->s, "-->");
+ if (0 == end) {
+ raise_error("invalid format, comment not terminated", pi->str, pi->s);
+ }
+ for (s = end - 1; pi->s < s && !done; s--) {
+ switch(*s) {
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\n':
+ case '\r':
+ break;
+ default:
+ *(s + 1) = '\0';
+ done = 1;
+ break;
+ }
+ }
+ *end = '\0'; // in case the comment was blank
+ pi->s = end + 3;
+ if (0 != pi->pcb->add_comment) {
+ pi->pcb->add_comment(pi, comment);
+ }
+}
+
+/* Entered after the '<' and the first character after that. Returns status
+ * code.
+ */
+static void
+read_element(PInfo pi) {
+ struct _Attr attrs[MAX_ATTRS];
+ Attr ap = attrs;
+ char *name;
+ char *ename;
+ char *end;
+ char c;
+ long elen;
+ int hasChildren = 0;
+ int done = 0;
+
+ ename = read_name_token(pi);
+ end = pi->s;
+ elen = end - ename;
+ next_non_white(pi);
+ c = *pi->s;
+ *end = '\0';
+ if ('/' == c) {
+ /* empty element, no attributes and no children */
+ pi->s++;
+ if ('>' != *pi->s) {
+ //printf("*** '%s' ***\n", pi->s);
+ raise_error("invalid format, element not closed", pi->str, pi->s);
+ }
+ pi->s++; /* past > */
+ ap->name = 0;
+ pi->pcb->add_element(pi, ename, attrs, hasChildren);
+ pi->pcb->end_element(pi, ename);
+
+ return;
+ }
+ /* read attribute names until the close (/ or >) is reached */
+ while (!done) {
+ if ('\0' == c) {
+ next_non_white(pi);
+ c = *pi->s;
+ }
+ switch (c) {
+ case '\0':
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
+ case '/':
+ // Element with just attributes.
+ pi->s++;
+ if ('>' != *pi->s) {
+ raise_error("invalid format, element not closed", pi->str, pi->s);
+ }
+ pi->s++;
+ ap->name = 0;
+ pi->pcb->add_element(pi, ename, attrs, hasChildren);
+ pi->pcb->end_element(pi, ename);
+
+ return;
+ case '>':
+ // has either children or a value
+ pi->s++;
+ hasChildren = 1;
+ done = 1;
+ ap->name = 0;
+ pi->pcb->add_element(pi, ename, attrs, hasChildren);
+ break;
+ default:
+ // Attribute name so it's an element and the attribute will be
+ // added to it.
+ ap->name = read_name_token(pi);
+ end = pi->s;
+ next_non_white(pi);
+ if ('=' != *pi->s++) {
+ raise_error("invalid format, no attribute value", pi->str, pi->s);
+ }
+ *end = '\0'; // terminate name
+ // read value
+ next_non_white(pi);
+ ap->value = read_quoted_value(pi);
+ if (0 != strchr(ap->value, '&')) {
+ if (0 != collapse_special((char*)ap->value)) {
+ raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
+ }
+ }
+ ap++;
+ if (MAX_ATTRS <= (ap - attrs)) {
+ raise_error("too many attributes", pi->str, pi->s);
+ }
+ break;
+ }
+ c = '\0';
+ }
+ if (hasChildren) {
+ char *start;
+
+ done = 0;
+ // read children
+ while (!done) {
+ start = pi->s;
+ next_non_white(pi);
+ c = *pi->s++;
+ if ('\0' == c) {
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
+ }
+ if ('<' == c) {
+ switch (*pi->s) {
+ case '!': /* better be a comment or CDATA */
+ pi->s++;
+ if ('-' == *pi->s && '-' == *(pi->s + 1)) {
+ pi->s += 2;
+ read_comment(pi);
+ } else if (0 == strncmp("[CDATA[", pi->s, 7)) {
+ pi->s += 7;
+ read_cdata(pi);
+ } else {
+ raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s);
+ }
+ break;
+ case '/':
+ pi->s++;
+ name = read_name_token(pi);
+ end = pi->s;
+ next_non_white(pi);
+ c = *pi->s;
+ *end = '\0';
+ if (0 != strcmp(name, ename)) {
+ raise_error("invalid format, elements overlap", pi->str, pi->s);
+ }
+ if ('>' != c) {
+ raise_error("invalid format, element not closed", pi->str, pi->s);
+ }
+ pi->s++;
+ pi->pcb->end_element(pi, ename);
+ return;
+ case '\0':
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
+ default:
+ // a child element
+ read_element(pi);
+ break;
+ }
+ } else { // read as TEXT
+ pi->s = start;
+ //pi->s--;
+ read_text(pi);
+ //read_reduced_text(pi);
+
+ // to exit read_text with no errors the next character must be <
+ if ('/' == *(pi->s + 1) &&
+ 0 == strncmp(ename, pi->s + 2, elen) &&
+ '>' == *(pi->s + elen + 2)) {
+ // close tag after text so treat as a value
+ pi->s += elen + 3;
+ pi->pcb->end_element(pi, ename);
+ return;
+ }
+ }
+ }
+ }
+}
+
+static void
+read_text(PInfo pi) {
+ char buf[MAX_TEXT_LEN];
+ char *b = buf;
+ char *alloc_buf = 0;
+ char *end = b + sizeof(buf) - 2;
+ char c;
+ int done = 0;
+
+ while (!done) {
+ c = *pi->s++;
+ switch(c) {
+ case '<':
+ done = 1;
+ pi->s--;
+ break;
+ case '\0':
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
+ default:
+ if ('&' == c) {
+ c = read_coded_char(pi);
+ }
+ if (end <= b) {
+ unsigned long size;
+
+ if (0 == alloc_buf) {
+ size = sizeof(buf) * 2;
+ if (0 == (alloc_buf = (char*)malloc(size))) {
+ raise_error("text too long", pi->str, pi->s);
+ }
+ memcpy(alloc_buf, buf, b - buf);
+ b = alloc_buf + (b - buf);
+ } else {
+ unsigned long pos = b - alloc_buf;
+
+ size = (end - alloc_buf) * 2;
+ if (0 == (alloc_buf = (char*)realloc(alloc_buf, size))) {
+ raise_error("text too long", pi->str, pi->s);
+ }
+ b = alloc_buf + pos;
+ }
+ end = alloc_buf + size - 2;
+ }
+ *b++ = c;
+ break;
+ }
+ }
+ *b = '\0';
+ if (0 != alloc_buf) {
+ pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1)));
+ free(alloc_buf);
+ } else {
+ pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
+ }
+}
+
+static char*
+read_name_token(PInfo pi) {
+ char *start;
+
+ next_non_white(pi);
+ start = pi->s;
+ for (; 1; pi->s++) {
+ switch (*pi->s) {
+ case ' ':
+ case '\t':
+ case '\f':
+ case '?':
+ case '=':
+ case '/':
+ case '>':
+ case '\n':
+ case '\r':
+ return start;
+ case '\0':
+ // documents never terminate after a name token
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
+ break; // to avoid warnings
+ default:
+ break;
+ }
+ }
+ return start;
+}
+
+static void
+read_cdata(PInfo pi) {
+ char *start;
+ char *end;
+
+ start = pi->s;
+ end = strstr(pi->s, "]]>");
+ if (end == 0) {
+ raise_error("invalid format, CDATA not terminated", pi->str, pi->s);
+ }
+ *end = '\0';
+ pi->s = end + 3;
+ if (0 != pi->pcb->add_cdata) {
+ pi->pcb->add_cdata(pi, start, end - start);
+ }
+}
+
+inline static void
+next_non_token(PInfo pi) {
+ for (; 1; pi->s++) {
+ switch(*pi->s) {
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '/':
+ case '>':
+ return;
+ default:
+ break;
+ }
+ }
+}
+
+/* Assume the value starts immediately and goes until the quote character is
+ * reached again. Do not read the character after the terminating quote.
+ */
+static char*
+read_quoted_value(PInfo pi) {
+ char *value = 0;
+
+ if ('"' == *pi->s || ('\'' == *pi->s && StrictEffort != pi->effort)) {
+ char term = *pi->s;
+
+ pi->s++; // skip quote character
+ value = pi->s;
+ for (; *pi->s != term; pi->s++) {
+ if ('\0' == *pi->s) {
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
+ }
+ }
+ *pi->s = '\0'; // terminate value
+ pi->s++; // move past quote
+ } else if (StrictEffort == pi->effort) {
+ raise_error("invalid format, expected a quote character", pi->str, pi->s);
+ } else {
+ value = pi->s;
+ next_white(pi);
+ if ('\0' == *pi->s) {
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
+ }
+ *pi->s++ = '\0'; // terminate value
+ }
+ return value;
+}
+
+static int
+read_coded_char(PInfo pi) {
+ char *b, buf[8];
+ char *end = buf + sizeof(buf);
+ char *s;
+ int c;
+
+ for (b = buf, s = pi->s; b < end; b++, s++) {
+ if (';' == *s) {
+ *b = '\0';
+ s++;
+ break;
+ }
+ *b = *s;
+ }
+ if (b > end) {
+ return *pi->s;
+ }
+ if ('#' == *buf) {
+ c = (int)strtol(buf + 1, &end, 10);
+ if (0 >= c || '\0' != *end) {
+ return *pi->s;
+ }
+ pi->s = s;
+
+ return c;
+ }
+ if (0 == strcasecmp(buf, "nbsp")) {
+ pi->s = s;
+ return ' ';
+ } else if (0 == strcasecmp(buf, "lt")) {
+ pi->s = s;
+ return '<';
+ } else if (0 == strcasecmp(buf, "gt")) {
+ pi->s = s;
+ return '>';
+ } else if (0 == strcasecmp(buf, "amp")) {
+ pi->s = s;
+ return '&';
+ } else if (0 == strcasecmp(buf, "quot")) {
+ pi->s = s;
+ return '"';
+ } else if (0 == strcasecmp(buf, "apos")) {
+ pi->s = s;
+ return '\'';
+ }
+ return *pi->s;
+}
+
+static int
+collapse_special(char *str) {
+ char *s = str;
+ char *b = str;
+
+ while ('\0' != *s) {
+ if ('&' == *s) {
+ int c;
+ char *end;
+
+ s++;
+ if ('#' == *s) {
+ c = (int)strtol(s, &end, 10);
+ if (';' != *end) {
+ return EDOM;
+ }
+ s = end + 1;
+ } else if (0 == strncasecmp(s, "lt;", 3)) {
+ c = '<';
+ s += 3;
+ } else if (0 == strncasecmp(s, "gt;", 3)) {
+ c = '>';
+ s += 3;
+ } else if (0 == strncasecmp(s, "amp;", 4)) {
+ c = '&';
+ s += 4;
+ } else if (0 == strncasecmp(s, "quot;", 5)) {
+ c = '"';
+ s += 5;
+ } else if (0 == strncasecmp(s, "apos;", 5)) {
+ c = '\'';
+ s += 5;
+ } else {
+ c = '?';
+ while (';' != *s++) {
+ if ('\0' == *s) {
+ return EDOM;
+ }
+ }
+ s++;
+ }
+ *b++ = (char)c;
+ } else {
+ *b++ = *s++;
+ }
+ }
+ *b = '\0';
+
+ return 0;
+}
+#endif
View
36 lib/oj.rb
@@ -0,0 +1,36 @@
+# Copyright (c) 2011, Peter Ohler<br>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# - Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# - Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# - Neither the name of Peter Ohler nor the names of its contributors may be
+# used to endorse or promote products derived from this software without
+# specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+module Oj
+ private
+ @@keep = []
+end
+
+#require 'ox/version'
+
+require 'oj/oj' # C extension
View
11 notes
@@ -0,0 +1,11 @@
+;; -*- mode: outline; outline-regexp: " *[-\+]"; indent-tabs-mode: nil -*-
+
+^c^d hide subtree
+^c^s show subtree
+
+- json object format
+
+- yajl-ruby is fastest out there
+
+
+- write parser with callbacks
View
45 test/foo.rb
@@ -0,0 +1,45 @@
+#!/usr/bin/env ruby -wW1
+# encoding: UTF-8
+
+$: << File.join(File.dirname(__FILE__), "../lib")
+$: << File.join(File.dirname(__FILE__), "../ext")
+
+#require 'test/unit'
+require 'optparse'
+require 'yajl'
+require 'oj'
+
+$indent = 2
+
+opts = OptionParser.new
+opts.on("-h", "--help", "Show this display") { puts opts; Process.exit!(0) }
+files = opts.parse(ARGV)
+
+[ 'true',
+ 'false',
+ 'null',
+ '[true, false, null]',
+ '[true, [true, false], null]',
+].each do |s|
+ x = Oj.load(s)
+ puts ">>> #{x}(#{x.class})"
+end
+
+iter = 1000000
+s = %{
+[ true, [false, [true, null], null, [true, false], null]]
+}
+
+start = Time.now
+iter.times do
+ Oj.load(s)
+end
+dt = Time.now - start
+puts "#{iter} Oj.load()s in #{dt} seconds or #{iter/dt} loads/second"
+
+start = Time.now
+iter.times do
+ Yajl::Parser.parse(s)
+end
+dt = Time.now - start
+puts "#{iter} Yajl::Parser.parse()s in #{dt} seconds or #{iter/dt} parses/second"
View
19 test/sample_obj.json
@@ -0,0 +1,19 @@
+
+{
+ "attr1": 12345,
+ "attr2": "a string",
+ "attr3": {
+ "class": "Time",
+ "val": 1234567.7777
+ }
+}
+
+
+{
+ "attr1": 12345,
+ "attr2": "a string",
+ "attr3/Time": 1234567.7777
+ "attr4/Foo::Bar": {
+ "a": "xyz"
+ }
+}
Please sign in to comment.
Something went wrong with that request. Please try again.