Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

dump and load are working at 2X faster than Yajl. Need work on encodi…

…ng though.
  • Loading branch information...
commit 7bf61c31eb53e7bc8ffe0ff491d07f1ce0561cb2 1 parent b8f3b5a
Peter Ohler authored
View
479 ext/oj/dump.c
@@ -0,0 +1,479 @@
+/* dump.c
+ * Copyright (c) 2012, Peter Ohler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <time.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "ruby.h"
+#include "oj.h"
+
+typedef unsigned long ulong;
+
+typedef struct _Str {
+ const char *str;
+ size_t len;
+} *Str;
+
+typedef struct _Element {
+ struct _Str clas;
+ struct _Str attr;
+ unsigned long id;
+ int indent; // < 0 indicates no \n
+ int closed;
+ char type;
+} *Element;
+
+typedef struct _Out {
+ void (*w_start)(struct _Out *out, Element e);
+ void (*w_end)(struct _Out *out, Element e);
+ void (*w_time)(struct _Out *out, VALUE obj);
+ char *buf;
+ char *end;
+ char *cur;
+// Cache8 circ_cache;
+// unsigned long circ_cnt;
+ int indent;
+ int depth; // used by dumpHash
+ Options opts;
+ VALUE obj;
+} *Out;
+
+static void dump_obj_to_json(VALUE obj, Options copts, Out out);
+static void dump_val(VALUE obj, int depth, Out out);
+static void dump_nil(Out out);
+static void dump_true(Out out);
+static void dump_false(Out out);
+static void dump_fixnum(VALUE obj, Out out);
+static void dump_float(VALUE obj, Out out);
+static void dump_cstr(const char *str, int cnt, Out out);
+static void dump_hex(u_char c, Out out);
+static void dump_str(VALUE obj, Out out);
+static void dump_sym(VALUE obj, Out out);
+static void dump_array(VALUE obj, int depth, Out out);
+static void dump_hash(VALUE obj, int depth, Out out);
+
+static void grow(Out out, size_t len);
+static int is_json_friendly(const u_char *str, int len);
+static int json_friendly_size(const u_char *str, int len);
+
+
+static char json_friendly_chars[256] = "\
+uuuuuuuuxxxuxxuuuuuuuuuuuuuuuuuu\
+ooxooooooooooooxoooooooooooooooo\
+ooooooooooooooooooooooooooooxooo\
+ooooooooooooooooooooooooooooooou\
+uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu\
+uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu\
+uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu\
+uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu";
+
+inline static int
+is_json_friendly(const u_char *str, int len) {
+ for (; 0 < len; str++, len--) {
+ if ('o' != json_friendly_chars[*str]) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+inline static int
+json_friendly_size(const u_char *str, int len) {
+ int cnt = 0;
+
+ for (; 0 < len; str++, len--) {
+ switch (json_friendly_chars[*str]) {
+ case 'o': cnt++; break;
+ case 'x': cnt += 2; break;
+ case 'u': cnt += 6; break;
+ default: break;
+ }
+ }
+ return cnt;
+}
+
+inline static void
+fill_indent(Out out, int cnt) {
+ cnt *= out->indent;
+ if (0 <= cnt) {
+ *out->cur++ = '\n';
+ for (; 0 < cnt; cnt--) {
+ *out->cur++ = ' ';
+ }
+ }
+}
+
+static void
+grow(Out out, size_t len) {
+ size_t size = out->end - out->buf;
+ long pos = out->cur - out->buf;
+ char *buf;
+
+ size *= 2;
+ if (size <= len * 2 + pos) {
+ size += len;
+ }
+ if (0 == (buf = (char*)realloc(out->buf, size + 10))) { // 1 extra for terminator character plus extra (paranoid)
+ rb_raise(rb_eNoMemError, "Failed to create string. [%d:%s]\n", ENOSPC, strerror(ENOSPC));
+ }
+ out->buf = buf;
+ out->end = buf + size;
+ out->cur = out->buf + pos;
+}
+
+inline static void
+dump_hex(u_char c, Out out) {
+ u_char d = c & 0xF0;
+
+ if (9 < d) {
+ *out->cur++ = (d - 10) + 'a';
+ } else {
+ *out->cur++ = d + '0';
+ }
+ d = c & 0x0F;
+ if (9 < d) {
+ *out->cur++ = (d - 10) + 'a';
+ } else {
+ *out->cur++ = d + '0';
+ }
+}
+
+static void
+dump_nil(Out out) {
+ size_t size = 4;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = 'n';
+ *out->cur++ = 'u';
+ *out->cur++ = 'l';
+ *out->cur++ = 'l';
+ *out->cur = '\0';
+}
+
+static void
+dump_true(Out out) {
+ size_t size = 4;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = 't';
+ *out->cur++ = 'r';
+ *out->cur++ = 'u';
+ *out->cur++ = 'e';
+ *out->cur = '\0';
+}
+
+static void
+dump_false(Out out) {
+ size_t size = 5;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = 'f';
+ *out->cur++ = 'a';
+ *out->cur++ = 'l';
+ *out->cur++ = 's';
+ *out->cur++ = 'e';
+ *out->cur = '\0';
+}
+
+static void
+dump_fixnum(VALUE obj, Out out) {
+ char buf[32];
+ char *b = buf + sizeof(buf) - 1;
+ long num = NUM2LONG(obj);
+ int neg = 0;
+
+ if (0 > num) {
+ neg = 1;
+ num = -num;
+ }
+ *b-- = '\0';
+ if (0 < num) {
+ for (; 0 < num; num /= 10, b--) {
+ *b = (num % 10) + '0';
+ }
+ if (neg) {
+ *b = '-';
+ } else {
+ b++;
+ }
+ } else {
+ *b = '0';
+ }
+ if (out->end - out->cur <= (long)(sizeof(buf) - (b - buf))) {
+ grow(out, sizeof(buf) - (b - buf));
+ }
+ for (; '\0' != *b; b++) {
+ *out->cur++ = *b;
+ }
+ *out->cur = '\0';
+}
+
+static void
+dump_float(VALUE obj, Out out) {
+ char buf[64];
+ char *b;
+ int cnt = sprintf(buf, "%0.16g", RFLOAT_VALUE(obj)); // used sprintf due to bug in snprintf
+
+ if (out->end - out->cur <= (long)cnt) {
+ grow(out, cnt);
+ }
+ for (b = buf; '\0' != *b; b++) {
+ *out->cur++ = *b;
+ }
+ *out->cur = '\0';
+}
+
+static void
+dump_cstr(const char *str, int cnt, Out out) {
+ int size = json_friendly_size((u_char*)str, cnt);
+
+ if (cnt == size) {
+ cnt += 2;
+ if (out->end - out->cur <= (long)cnt) {
+ grow(out, cnt);
+ }
+ *out->cur++ = '"';
+ for (; '\0' != *str; str++) {
+ *out->cur++ = *str;
+ }
+ *out->cur++ = '"';
+ } else {
+ // TBD maybe use ruby to generate string
+ size += 2;
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = '"';
+ for (; '\0' != *str; str++) {
+ switch (json_friendly_chars[(u_char)*str]) {
+ case 'o':
+ *out->cur++ = *str;
+ break;
+ case 'x':
+ *out->cur++ = '\\';
+ switch (*str) {
+ case '\b': *out->cur++ = 'b'; break;
+ case '\t': *out->cur++ = 't'; break;
+ case '\n': *out->cur++ = 'n'; break;
+ case '\f': *out->cur++ = 'f'; break;
+ case '\r': *out->cur++ = 'r'; break;
+ default: *out->cur++ = *str; break;
+ break;
+ }
+ break;
+ case 'u':
+ *out->cur++ = '\\';
+ *out->cur++ = 'u';
+ if ((u_char)*str <= 0x7F) {
+ *out->cur++ = '0';
+ *out->cur++ = '0';
+ dump_hex((u_char)*str, out);
+ } else { // continuation?
+ // TBD lead with \u00 . grab next char?
+ *out->cur++ = '0';
+ *out->cur++ = '0';
+ dump_hex((u_char)*str, out);
+ }
+ break;
+ default:
+ // TBD raise
+ break;
+ }
+ }
+ *out->cur++ = '"';
+ }
+ *out->cur = '\0';
+}
+
+static void
+dump_str(VALUE obj, Out out) {
+ dump_cstr(StringValuePtr(obj), (int)RSTRING_LEN(obj), out);
+}
+
+static void
+dump_sym(VALUE obj, Out out) {
+ const char *sym = rb_id2name(SYM2ID(obj));
+
+ dump_cstr(sym, (int)strlen(sym), out);
+}
+
+static void
+dump_array(VALUE a, int depth, Out out) {
+ VALUE *np = RARRAY_PTR(a);
+ size_t size = 2;
+ int cnt = (int)RARRAY_LEN(a);
+ int d2 = depth + 1;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = '[';
+ if (0 == cnt) {
+ *out->cur++ = ']';
+ } else {
+ size = d2 * out->indent + 2;
+ for (; 0 < cnt; cnt--, np++) {
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ fill_indent(out, d2);
+ dump_val(*np, d2, out);
+ if (1 < cnt) {
+ // TBD check size?
+ *out->cur++ = ',';
+ }
+ }
+ size = depth * out->indent + 1;
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ fill_indent(out, depth);
+ *out->cur++ = ']';
+ }
+ *out->cur = '\0';
+}
+
+static int
+hash_cb(VALUE key, VALUE value, Out out) {
+ int depth = out->depth;
+ size_t size = depth * out->indent + 1;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ fill_indent(out, depth);
+ dump_str(key, out);
+ *out->cur++ = ':';
+ dump_val(value, depth, out);
+ out->depth = depth;
+ *out->cur++ = ',';
+
+ return ST_CONTINUE;
+}
+
+static void
+dump_hash(VALUE obj, int depth, Out out) {
+ int cnt = (int)RHASH_SIZE(obj);
+
+ *out->cur++ = '{';
+ if (0 == cnt) {
+ *out->cur++ = '}';
+ } else {
+ size_t size = depth * out->indent + 2;
+
+ out->depth = depth + 1;
+ rb_hash_foreach(obj, hash_cb, (VALUE)out);
+ out->cur--; // backup to overwrite last comma
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ fill_indent(out, depth);
+ *out->cur++ = '}';
+ }
+ // TBD
+ *out->cur = '\0';
+}
+
+static void
+dump_val(VALUE obj, int depth, Out out) {
+ switch (rb_type(obj)) {
+ case T_NIL: dump_nil(out); break;
+ case T_TRUE: dump_true(out); break;
+ case T_FALSE: dump_false(out); break;
+ case T_FIXNUM: dump_fixnum(obj, out); break;
+ case T_FLOAT: dump_float(obj, out); break;
+ // BIGNUM
+ case T_STRING: dump_str(obj, out); break;
+ case T_SYMBOL: dump_sym(obj, out); break;
+ case T_ARRAY: dump_array(obj, depth, out); break;
+ case T_HASH: dump_hash(obj, depth, out); break;
+ default:
+ // TBD raise, call json, or leave as nil, or get all variables
+ break;
+ }
+}
+
+static void
+dump_obj_to_json(VALUE obj, Options copts, Out out) {
+ out->buf = (char*)malloc(65336);
+ out->end = out->buf + 65325; // 1 less than end plus extra for possible errors
+ out->cur = out->buf;
+// out->circ_cache = 0;
+// out->circ_cnt = 0;
+ out->opts = copts;
+ out->obj = obj;
+/* if (Yes == copts->circular) {
+ ox_cache8_new(&out->circ_cache);
+ }*/
+ out->indent = copts->indent;
+ out->indent = 2; // TBD
+ dump_val(obj, 0, out);
+
+/* if (Yes == copts->circular) {
+ ox_cache8_delete(out->circ_cache);
+ }*/
+}
+
+char*
+write_obj_to_str(VALUE obj, Options copts) {
+ struct _Out out;
+
+ dump_obj_to_json(obj, copts, &out);
+
+ return out.buf;
+}
+
+void
+write_obj_to_file(VALUE obj, const char *path, Options copts) {
+ struct _Out out;
+ size_t size;
+ FILE *f;
+
+ dump_obj_to_json(obj, copts, &out);
+ size = out.cur - out.buf;
+ if (0 == (f = fopen(path, "w"))) {
+ rb_raise(rb_eIOError, "%s\n", strerror(errno));
+ }
+ if (size != fwrite(out.buf, 1, size, f)) {
+ int err = ferror(f);
+ rb_raise(rb_eIOError, "Write failed. [%d:%s]\n", err, strerror(err));
+ }
+ free(out.buf);
+ fclose(f);
+}
View
156 ext/oj/gen_load.c
@@ -1,156 +0,0 @@
-/* gen_load.c
- * Copyright (c) 2011, Peter Ohler
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * - Neither the name of Peter Ohler nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdlib.h>
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdarg.h>
-
-#include "ruby.h"
-#include "oj.h"
-
-static void add_obj(PInfo pi);
-static void end_obj(PInfo pi);
-static void add_array(PInfo pi);
-static void end_array(PInfo pi);
-static void add_key(PInfo pi, char *text);
-static void add_str(PInfo pi, char *text);
-static void add_int(PInfo pi, int64_t val);
-static void add_dub(PInfo pi, double val);
-static void add_true(PInfo pi);
-static void add_false(PInfo pi);
-static void add_nil(PInfo pi);
-
-struct _ParseCallbacks _oj_gen_callbacks = {
- add_obj,
- end_obj,
- add_array,
- end_array,
- add_key,
- add_str,
- add_int,
- add_dub,
- add_nil,
- add_true,
- add_false
-};
-
-ParseCallbacks oj_gen_callbacks = &_oj_gen_callbacks;
-
-static inline void
-add_val(PInfo pi, VALUE val) {
- if (0 == pi->h) {
- pi->obj = val;
- } else if (ArrayCode == pi->h->type) {
- rb_ary_push(pi->h->obj, val);
- } else if (ObjectCode == pi->h->type) {
- // TBD
- } else {
- raise_error("expected to be in an Array or Hash", pi->str, pi->s);
- }
-}
-
-static void
-add_obj(PInfo pi) {
- printf("*** add_obj\n");
-}
-
-static void
-end_obj(PInfo pi) {
- printf("*** end_obj\n");
-}
-
-static void
-add_array(PInfo pi) {
- VALUE a = rb_ary_new();
-
- if (0 == pi->h) {
- pi->h = pi->helpers;
- pi->h->obj = a;
- pi->h->type = ArrayCode;
- pi->obj = a;
- } else if (ArrayCode == pi->h->type) {
- rb_ary_push(pi->h->obj, a);
- pi->h++;
- pi->h->obj = a;
- pi->h->type = ArrayCode;
- } else if (ObjectCode == pi->h->type) {
- // TBD
- } else {
- raise_error("expected to be in an Array or Hash", pi->str, pi->s);
- }
-}
-
-static void
-end_array(PInfo pi) {
- if (0 == pi->h) {
- // TBD error
- } else if (pi->helpers < pi->h) {
- pi->h--;
- } else {
- pi->h = 0;
- }
-}
-
-static void
-add_key(PInfo pi, char *text) {
- printf("*** add_key %s\n", text);
-}
-
-static void
-add_str(PInfo pi, char *text) {
- printf("*** add_str %s\n", text);
-}
-
-static void
-add_int(PInfo pi, int64_t val) {
- printf("*** add_int %lld\n", val);
-}
-
-static void
-add_dub(PInfo pi, double val) {
- printf("*** add_dub %f\n", val);
-}
-
-static void
-add_true(PInfo pi) {
- add_val(pi, Qtrue);
-}
-
-static void
-add_false(PInfo pi) {
- add_val(pi, Qfalse);
-}
-
-static void
-add_nil(PInfo pi) {
- add_val(pi, Qnil);
-}
View
426 ext/oj/load.c
@@ -0,0 +1,426 @@
+/* load.c
+ * Copyright (c) 2012, Peter Ohler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include "ruby.h"
+#include "oj.h"
+
+typedef struct _ParseInfo {
+ char *str; /* buffer being read from */
+ char *s; /* current position in buffer */
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_encoding *encoding;
+#else
+ void *encoding;
+#endif
+ int trace;
+} *ParseInfo;
+
+static VALUE read_next(ParseInfo pi);
+static VALUE read_obj(ParseInfo pi);
+static VALUE read_array(ParseInfo pi);
+static VALUE read_str(ParseInfo pi);
+static VALUE read_num(ParseInfo pi);
+static VALUE read_true(ParseInfo pi);
+static VALUE read_false(ParseInfo pi);
+static VALUE read_nil(ParseInfo pi);
+static void next_non_white(ParseInfo pi);
+static char* read_quoted_value(ParseInfo pi);
+
+
+/* This XML parser is a single pass, destructive, callback parser. It is a
+ * single pass parse since it only make one pass over the characters in the
+ * XML document string. It is destructive because it re-uses the content of
+ * the string for values in the callback and places \0 characters at various
+ * places to mark the end of tokens and strings. It is a callback parser like
+ * a SAX parser because it uses callback when document elements are
+ * encountered.
+ *
+ * Parsing is very tolerant. Lack of headers and even mispelled element
+ * endings are passed over without raising an error. A best attempt is made in
+ * all cases to parse the string.
+ */
+
+inline static void
+next_non_white(ParseInfo pi) {
+ for (; 1; pi->s++) {
+ switch(*pi->s) {
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\n':
+ case '\r':
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+inline static void
+next_white(ParseInfo pi) {
+ for (; 1; pi->s++) {
+ switch(*pi->s) {
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\0':
+ return;
+ default:
+ break;
+ }
+ }
+}
+
+VALUE
+parse(char *json, int trace) {
+ VALUE obj;
+ struct _ParseInfo pi;
+
+ if (0 == json) {
+ raise_error("Invalid arg, xml string can not be null", json, 0);
+ }
+ if (trace) {
+ printf("Parsing JSON:\n%s\n", json);
+ }
+ /* initialize parse info */
+ pi.str = json;
+ pi.s = json;
+ pi.encoding = 0;
+ pi.trace = trace;
+ if (Qundef == (obj = read_next(&pi))) {
+ raise_error("no object read", pi.str, pi.s);
+ }
+ next_non_white(&pi); // skip white space
+ if ('\0' != *pi.s) {
+ raise_error("invalid format, extra characters", pi.str, pi.s);
+ }
+ return obj;
+}
+
+static VALUE
+read_next(ParseInfo pi) {
+ VALUE obj;
+
+ next_non_white(pi); // skip white space
+ switch (*pi->s) {
+ case '{':
+ obj = read_obj(pi);
+ break;
+ case '[':
+ obj = read_array(pi);
+ break;
+ case '"':
+ obj = read_str(pi);
+ break;
+ case '+':
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ obj = read_num(pi);
+ break;
+ case 't':
+ obj = read_true(pi);
+ break;
+ case 'f':
+ obj = read_false(pi);
+ break;
+ case 'n':
+ obj = read_nil(pi);
+ break;
+ case '\0':
+ obj = Qundef;
+ break;
+ default:
+ obj = Qundef;
+ break;
+ }
+ return obj;
+}
+
+static VALUE
+read_obj(ParseInfo pi) {
+ VALUE obj = Qundef;
+ VALUE key = Qundef;
+ VALUE val = Qundef;
+
+ pi->s++;
+ while (1) {
+ next_non_white(pi);
+ if ('"' != *pi->s || Qundef == (key = read_str(pi))) {
+ raise_error("unexpected character", pi->str, pi->s);
+ }
+ next_non_white(pi);
+ if (':' == *pi->s) {
+ pi->s++;
+ } else {
+ raise_error("invalid format, expected :", pi->str, pi->s);
+ }
+ if (Qundef == (val = read_next(pi))) {
+ raise_error("unexpected character", pi->str, pi->s);
+ }
+ if (Qundef == obj) {
+ obj = rb_hash_new();
+ }
+ rb_hash_aset(obj, key, val);
+ next_non_white(pi);
+ if ('}' == *pi->s) {
+ pi->s++;
+ break;
+ } else if (',' == *pi->s) {
+ pi->s++;
+ } else {
+ raise_error("invalid format, expected , or } while in an object", pi->str, pi->s);
+ }
+ }
+ return obj;
+}
+
+static VALUE
+read_array(ParseInfo pi) {
+ VALUE a = rb_ary_new();
+ VALUE e;
+
+ pi->s++;
+ while (1) {
+ if (Qundef == (e = read_next(pi))) {
+ raise_error("unexpected character", pi->str, pi->s);
+ }
+ rb_ary_push(a, e);
+ next_non_white(pi); // skip white space
+ if (',' == *pi->s) {
+ pi->s++;
+ } else if (']' == *pi->s) {
+ pi->s++;
+ break;
+ } else {
+ raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s);
+ }
+ }
+ return a;
+}
+
+static VALUE
+read_str(ParseInfo pi) {
+ char *text = read_quoted_value(pi);
+ VALUE s = rb_str_new2(text);
+
+#ifdef HAVE_RUBY_ENCODING_H
+ if (0 != pi->encoding) {
+ rb_enc_associate(s, pi->encoding);
+ }
+#endif
+ return s;
+}
+
+static VALUE
+read_num(ParseInfo pi) {
+ int64_t n = 0;
+ long a = 0;
+ long div = 1;
+ long e = 0;
+ int neg = 0;
+ int eneg = 0;
+
+ if ('-' == *pi->s) {
+ pi->s++;
+ neg = 1;
+ } else if ('+' == *pi->s) {
+ pi->s++;
+ }
+ for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
+ n = n * 10 + (*pi->s - '0');
+ }
+ if ('.' == *pi->s) {
+ pi->s++;
+ for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
+ a = a * 10 + (*pi->s - '0');
+ div *= 10;
+ }
+ }
+ if ('e' == *pi->s || 'E' == *pi->s) {
+ pi->s++;
+ if ('-' == *pi->s) {
+ pi->s++;
+ eneg = 1;
+ } else if ('+' == *pi->s) {
+ pi->s++;
+ }
+ for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
+ e = e * 10 + (*pi->s - '0');
+ }
+ }
+ if (neg) {
+ n = -n;
+ }
+ if (0 == e && 0 == a && 1 == div) {
+ return LONG2NUM(n);
+ } else {
+ double d = (double)n + (double)a / (double)div;
+
+ if (0 != e) {
+ if (eneg) {
+ e = -e;
+ }
+ d *= pow(10.0, e);
+ }
+ return DBL2NUM(d);
+ }
+}
+
+static VALUE
+read_true(ParseInfo pi) {
+ pi->s++;
+ if ('r' != *pi->s || 'u' != *(pi->s + 1) || 'e' != *(pi->s + 2)) {
+ raise_error("invalid format, expected 'true'", pi->str, pi->s);
+ }
+ pi->s += 3;
+
+ return Qtrue;
+}
+
+static VALUE
+read_false(ParseInfo pi) {
+ pi->s++;
+ if ('a' != *pi->s || 'l' != *(pi->s + 1) || 's' != *(pi->s + 2) || 'e' != *(pi->s + 3)) {
+ raise_error("invalid format, expected 'false'", pi->str, pi->s);
+ }
+ pi->s += 4;
+
+ return Qfalse;
+}
+
+static VALUE
+read_nil(ParseInfo pi) {
+ pi->s++;
+ if ('u' != *pi->s || 'l' != *(pi->s + 1) || 'l' != *(pi->s + 2)) {
+ raise_error("invalid format, expected 'nil'", pi->str, pi->s);
+ }
+ pi->s += 3;
+
+ return Qnil;
+}
+
+static char
+read_hex(ParseInfo pi, char *h) {
+ uint8_t b = 0;
+
+ if ('0' <= *h && *h <= '9') {
+ b = *h - '0';
+ } else if ('A' <= *h && *h <= 'F') {
+ b = *h - 'A' + 10;
+ } else if ('a' <= *h && *h <= 'f') {
+ b = *h - 'a' + 10;
+ } else {
+ pi->s = h;
+ raise_error("invalid hex character", pi->str, pi->s);
+ }
+ h++;
+ b = b << 4;
+ if ('0' <= *h && *h <= '9') {
+ b += *h - '0';
+ } else if ('A' <= *h && *h <= 'F') {
+ b += *h - 'A' + 10;
+ } else if ('a' <= *h && *h <= 'f') {
+ b += *h - 'a' + 10;
+ } else {
+ pi->s = h;
+ raise_error("invalid hex character", pi->str, pi->s);
+ }
+ return (char)b;
+}
+
+/* Assume the value starts immediately and goes until the quote character is
+ * reached again. Do not read the character after the terminating quote.
+ */
+static char*
+read_quoted_value(ParseInfo pi) {
+ char *value = 0;
+ char *h = pi->s; // head
+ char *t = h; // tail
+
+ h++; // skip quote character
+ t++;
+ value = h;
+ // TBD can whole string be read in and then eval-ed by ruby of there is a special character
+ for (; '"' != *h; h++, t++) {
+ if ('\0' == *h) {
+ pi->s = h;
+ raise_error("quoted string not terminated", pi->str, pi->s);
+ } else if ('\\' == *h) {
+ h++;
+ switch (*h) {
+ case 'n': *t = '\n'; break;
+ case 'r': *t = '\r'; break;
+ case 't': *t = '\t'; break;
+ case 'f': *t = '\f'; break;
+ case 'b': *t = '\b'; break;
+ case '"': *t = '"'; break;
+ case '/': *t = '/'; break;
+ case '\\': *t = '\\'; break;
+ case 'u':
+ // TBD if first character is 00 then skip it
+ h++;
+ *t = read_hex(pi, h);
+ h += 2;
+ if ('\0' != *t) {
+ t++;
+ }
+ *t = read_hex(pi, h);
+ h++;
+ break;
+ default:
+ pi->s = h;
+ raise_error("invalid escaped character", pi->str, pi->s);
+ break;
+ }
+ } else if (t != h) {
+ *t = *h;
+ }
+ }
+ *t = '\0'; // terminate value
+ pi->s = h + 1;
+
+ return value;
+}
View
41 ext/oj/oj.c
@@ -36,18 +36,25 @@
#include "ruby.h"
#include "oj.h"
+struct _Options default_options = {
+ { '\0' }, // encoding
+ 2, // indent
+ 0, // trace
+ No, // circular
+ NoMode, // mode
+// StrictEffort, // effort
+};
+
void Init_oj();
VALUE Oj = Qnil;
-extern ParseCallbacks oj_gen_callbacks;
-
-
static VALUE
load(char *json, int argc, VALUE *argv, VALUE self) {
VALUE obj;
-
- obj = parse(json, oj_gen_callbacks, 0, 0);
+
+ // TBD other options like obj mode
+ obj = parse(json, 0);
free(json);
return obj;
@@ -101,12 +108,36 @@ load_file(int argc, VALUE *argv, VALUE self) {
return load(json, argc - 1, argv + 1, self);
}
+static VALUE
+dump(int argc, VALUE *argv, VALUE self) {
+ char *json;
+ struct _Options copts = default_options;
+ VALUE rstr;
+
+ if (2 == argc) {
+ //parse_dump_options(argv[1], &copts);
+ }
+ if (0 == (json = write_obj_to_str(*argv, &copts))) {
+ rb_raise(rb_eNoMemError, "Not enough memory.\n");
+ }
+ rstr = rb_str_new2(json);
+#ifdef ENCODING_INLINE_MAX
+ if ('\0' != *copts.encoding) {
+ rb_enc_associate(rstr, rb_enc_find(copts.encoding));
+ }
+#endif
+ free(json);
+
+ return rstr;
+}
+
void Init_oj() {
Oj = rb_define_module("Oj");
rb_define_module_function(Oj, "load", load_str, -1);
rb_define_module_function(Oj, "load_file", load_file, -1);
+ rb_define_module_function(Oj, "dump", dump, -1);
}
void
View
88 ext/oj/oj.h
@@ -43,7 +43,6 @@ extern "C" {
// HAVE_RUBY_ENCODING_H defined for Ruby 1.9
#include "ruby/encoding.h"
#endif
-#include "cache.h"
#ifdef JRUBY
#define NO_RSTRUCT 1
@@ -61,75 +60,30 @@ extern "C" {
#define raise_error(msg, xml, current) _raise_error(msg, xml, current, __FILE__, __LINE__)
-#define MAX_TEXT_LEN 4096
-#define MAX_DEPTH 1024
-
typedef enum {
- NoCode = 0,
- ArrayCode = 'a',
- String64Code = 'b', // base64 encoded String
- ClassCode = 'c',
- Symbol64Code = 'd', // base64 encoded Symbol
- FloatCode = 'f',
- RegexpCode = 'g',
- HashCode = 'h',
- FixnumCode = 'i',
- BignumCode = 'j',
- KeyCode = 'k', // indicates the value is a hash key, kind of a hack
- RationalCode = 'l',
- SymbolCode = 'm',
- FalseClassCode = 'n',
- ObjectCode = 'o',
- RefCode = 'p',
- RangeCode = 'r',
- StringCode = 's',
- TimeCode = 't',
- StructCode = 'u',
- ComplexCode = 'v',
- RawCode = 'x',
- TrueClassCode = 'y',
- NilClassCode = 'z',
-} Type;
-
-typedef struct _Helper {
- ID var; /* Object var ID */
- VALUE obj; /* object created or Qundef if not appropriate */
- Type type;
-} *Helper;
-
-typedef struct _PInfo *PInfo;
-
-typedef struct _ParseCallbacks {
- void (*add_obj)(PInfo pi);
- void (*end_obj)(PInfo pi);
- void (*add_array)(PInfo pi);
- void (*end_array)(PInfo pi);
- void (*add_key)(PInfo pi, char *text);
- void (*add_str)(PInfo pi, char *text);
- void (*add_int)(PInfo pi, int64_t val);
- void (*add_dub)(PInfo pi, double val);
- void (*add_nil)(PInfo pi);
- void (*add_true)(PInfo pi);
- void (*add_false)(PInfo pi);
-} *ParseCallbacks;
+ Yes = 'y',
+ No = 'n',
+ NotSet = 0
+} YesNo;
-/* parse information structure */
-struct _PInfo {
- struct _Helper helpers[MAX_DEPTH];
- Helper h; /* current helper or 0 if not set */
- char *str; /* buffer being read from */
- char *s; /* current position in buffer */
- ParseCallbacks pcb;
- VALUE obj;
-#ifdef HAVE_RUBY_ENCODING_H
- rb_encoding *encoding;
-#else
- void *encoding;
-#endif
- int trace;
-};
+typedef enum {
+ ObjMode = 'o',
+ GenMode = 'g',
+ NoMode = 0
+} LoadMode;
+
+typedef struct _Options {
+ char encoding[64]; // encoding, stored in the option to avoid GC invalidation in default values
+ int indent; // indention for dump, default 2
+ int trace; // trace level
+ char circular; // YesNo
+ char mode; // LoadMode
+ char effort; // Effort
+} *Options;
+
+extern VALUE parse(char *json, int trace);
+extern char* write_obj_to_str(VALUE obj, Options copts);
-extern VALUE parse(char *json, ParseCallbacks pcb, char **endp, int trace);
extern void _raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
View
780 ext/oj/parse.c
@@ -1,780 +0,0 @@
-/* parse.c
- * Copyright (c) 2011, Peter Ohler
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * - Neither the name of Peter Ohler nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdlib.h>
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "ruby.h"
-#include "oj.h"
-
-//static void read_instruction(PInfo pi);
-//static void read_doctype(PInfo pi);
-//static void read_comment(PInfo pi);
-//static void read_element(PInfo pi);
-//static void read_text(PInfo pi);
-//static void read_cdata(PInfo pi);
-//static char* read_name_token(PInfo pi);
-//static char* read_quoted_value(PInfo pi);
-//static int read_coded_char(PInfo pi);
-//static void next_non_white(PInfo pi);
-//static int collapse_special(char *str);
-
-static void read_next(PInfo pi);
-static void read_obj(PInfo pi);
-static void read_array(PInfo pi);
-static void read_str(PInfo pi);
-static void read_num(PInfo pi);
-static void read_true(PInfo pi);
-static void read_false(PInfo pi);
-static void read_nil(PInfo pi);
-
-
-/* This XML parser is a single pass, destructive, callback parser. It is a
- * single pass parse since it only make one pass over the characters in the
- * XML document string. It is destructive because it re-uses the content of
- * the string for values in the callback and places \0 characters at various
- * places to mark the end of tokens and strings. It is a callback parser like
- * a SAX parser because it uses callback when document elements are
- * encountered.
- *
- * Parsing is very tolerant. Lack of headers and even mispelled element
- * endings are passed over without raising an error. A best attempt is made in
- * all cases to parse the string.
- */
-
-inline static void
-next_non_white(PInfo pi) {
- for (; 1; pi->s++) {
- switch(*pi->s) {
- case ' ':
- case '\t':
- case '\f':
- case '\n':
- case '\r':
- break;
- default:
- return;
- }
- }
-}
-
-inline static void
-next_white(PInfo pi) {
- for (; 1; pi->s++) {
- switch(*pi->s) {
- case ' ':
- case '\t':
- case '\f':
- case '\n':
- case '\r':
- case '\0':
- return;
- default:
- break;
- }
- }
-}
-
-VALUE
-parse(char *json, ParseCallbacks pcb, char **endp, int trace) {
- struct _PInfo pi;
-
- if (0 == json) {
- raise_error("Invalid arg, xml string can not be null", json, 0);
- }
- if (trace) {
- printf("Parsing JSON:\n%s\n", json);
- }
- /* initialize parse info */
- pi.str = json;
- pi.s = json;
- pi.h = 0;
- pi.pcb = pcb;
- pi.obj = Qnil;
- pi.encoding = 0;
- pi.trace = trace;
- read_next(&pi);
- next_non_white(&pi); // skip white space
- if ('\0' != *pi.s) {
- raise_error("invalid format, extra characters", pi.str, pi.s);
- }
- return pi.obj;
-}
-
-static void
-read_next(PInfo pi) {
- next_non_white(pi); // skip white space
- switch (*pi->s) {
- case '{':
- pi->s++;
- read_obj(pi);
- break;
- case '[':
- pi->s++;
- read_array(pi);
- break;
- case '"':
- pi->s++;
- read_str(pi);
- break;
- case '+':
- case '-':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- read_num(pi);
- break;
- case 't':
- read_true(pi);
- break;
- case 'f':
- read_false(pi);
- break;
- case 'n':
- read_nil(pi);
- break;
- case '\0':
- break;
- default:
- break;
- }
-}
-
-static void
-read_obj(PInfo pi) {
-}
-
-static void
-read_array(PInfo pi) {
- if (0 != pi->pcb->add_array) {
- pi->pcb->add_array(pi);
- }
- while (1) {
- read_next(pi);
- next_non_white(pi); // skip white space
- if (',' == *pi->s) {
- pi->s++;
- } else if (']' == *pi->s) {
- pi->s++;
- break;
- } else {
- raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s);
- }
- }
- if (0 != pi->pcb->end_array) {
- pi->pcb->end_array(pi);
- }
-}
-
-static void
-read_str(PInfo pi) {
-}
-
-static void
-read_num(PInfo pi) {
-}
-
-static void
-read_true(PInfo pi) {
- pi->s++;
- if ('r' != *pi->s || 'u' != *(pi->s + 1) || 'e' != *(pi->s + 2)) {
- raise_error("invalid format, expected 'true'", pi->str, pi->s);
- }
- pi->s += 3;
- if (0 != pi->pcb->add_true) {
- pi->pcb->add_true(pi);
- }
-}
-
-static void
-read_false(PInfo pi) {
- pi->s++;
- if ('a' != *pi->s || 'l' != *(pi->s + 1) || 's' != *(pi->s + 2) || 'e' != *(pi->s + 3)) {
- raise_error("invalid format, expected 'false'", pi->str, pi->s);
- }
- pi->s += 4;
- if (0 != pi->pcb->add_false) {
- pi->pcb->add_false(pi);
- }
-}
-
-static void
-read_nil(PInfo pi) {
- pi->s++;
- if ('u' != *pi->s || 'l' != *(pi->s + 1) || 'l' != *(pi->s + 2)) {
- raise_error("invalid format, expected 'nil'", pi->str, pi->s);
- }
- pi->s += 3;
- if (0 != pi->pcb->add_nil) {
- pi->pcb->add_nil(pi);
- }
-}
-
-
-#if 0
-/* Entered after the "<?" sequence. Ready to read the rest.
- */
-static void
-read_instruction(PInfo pi) {
- struct _Attr attrs[MAX_ATTRS + 1];
- Attr a = attrs;
- char *target;
- char *end;
- char c;
-
- memset(attrs, 0, sizeof(attrs));
- target = read_name_token(pi);
- end = pi->s;
- next_non_white(pi);
- c = *pi->s;
- *end = '\0'; // terminate name
- if ('?' != c) {
- while ('?' != *pi->s) {
- if ('\0' == *pi->s) {
- raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
- }
- next_non_white(pi);
- a->name = read_name_token(pi);
- end = pi->s;
- next_non_white(pi);
- if ('=' != *pi->s++) {
- raise_error("invalid format, no attribute value", pi->str, pi->s);
- }
- *end = '\0'; // terminate name
- // read value
- next_non_white(pi);
- a->value = read_quoted_value(pi);
- a++;
- if (MAX_ATTRS <= (a - attrs)) {
- raise_error("too many attributes", pi->str, pi->s);
- }
- }
- if ('?' == *pi->s) {
- pi->s++;
- }
- } else {
- pi->s++;
- }
- if ('>' != *pi->s++) {
- raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
- }
- if (0 != pi->pcb->instruct) {
- pi->pcb->instruct(pi, target, attrs);
- }
-}
-
-/* Entered after the "<!DOCTYPE" sequence plus the first character after
- * that. Ready to read the rest. Returns error code.
- */
-static void
-read_doctype(PInfo pi) {
- char *docType;
- int depth = 1;
- char c;
-
- next_non_white(pi);
- docType = pi->s;
- while (1) {
- c = *pi->s++;
- if ('\0' == c) {
- raise_error("invalid format, prolog not terminated", pi->str, pi->s);
- } else if ('<' == c) {
- depth++;
- } else if ('>' == c) {
- depth--;
- if (0 == depth) { /* done, at the end */
- pi->s--;
- break;
- }
- }
- }
- *pi->s = '\0';
- pi->s++;
- if (0 != pi->pcb->add_doctype) {
- pi->pcb->add_doctype(pi, docType);
- }
-}
-
-/* Entered after "<!--". Returns error code.
- */
-static void
-read_comment(PInfo pi) {
- char *end;
- char *s;
- char *comment;
- int done = 0;
-
- next_non_white(pi);
- comment = pi->s;
- end = strstr(pi->s, "-->");
- if (0 == end) {
- raise_error("invalid format, comment not terminated", pi->str, pi->s);
- }
- for (s = end - 1; pi->s < s && !done; s--) {
- switch(*s) {
- case ' ':
- case '\t':
- case '\f':
- case '\n':
- case '\r':
- break;
- default:
- *(s + 1) = '\0';
- done = 1;
- break;
- }
- }
- *end = '\0'; // in case the comment was blank
- pi->s = end + 3;
- if (0 != pi->pcb->add_comment) {
- pi->pcb->add_comment(pi, comment);
- }
-}
-
-/* Entered after the '<' and the first character after that. Returns status
- * code.
- */
-static void
-read_element(PInfo pi) {
- struct _Attr attrs[MAX_ATTRS];
- Attr ap = attrs;
- char *name;
- char *ename;
- char *end;
- char c;
- long elen;
- int hasChildren = 0;
- int done = 0;
-
- ename = read_name_token(pi);
- end = pi->s;
- elen = end - ename;
- next_non_white(pi);
- c = *pi->s;
- *end = '\0';
- if ('/' == c) {
- /* empty element, no attributes and no children */
- pi->s++;
- if ('>' != *pi->s) {
- //printf("*** '%s' ***\n", pi->s);
- raise_error("invalid format, element not closed", pi->str, pi->s);
- }
- pi->s++; /* past > */
- ap->name = 0;
- pi->pcb->add_element(pi, ename, attrs, hasChildren);
- pi->pcb->end_element(pi, ename);
-
- return;
- }
- /* read attribute names until the close (/ or >) is reached */
- while (!done) {
- if ('\0' == c) {
- next_non_white(pi);
- c = *pi->s;
- }
- switch (c) {
- case '\0':
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- case '/':
- // Element with just attributes.
- pi->s++;
- if ('>' != *pi->s) {
- raise_error("invalid format, element not closed", pi->str, pi->s);
- }
- pi->s++;
- ap->name = 0;
- pi->pcb->add_element(pi, ename, attrs, hasChildren);
- pi->pcb->end_element(pi, ename);
-
- return;
- case '>':
- // has either children or a value
- pi->s++;
- hasChildren = 1;
- done = 1;
- ap->name = 0;
- pi->pcb->add_element(pi, ename, attrs, hasChildren);
- break;
- default:
- // Attribute name so it's an element and the attribute will be
- // added to it.
- ap->name = read_name_token(pi);
- end = pi->s;
- next_non_white(pi);
- if ('=' != *pi->s++) {
- raise_error("invalid format, no attribute value", pi->str, pi->s);
- }
- *end = '\0'; // terminate name
- // read value
- next_non_white(pi);
- ap->value = read_quoted_value(pi);
- if (0 != strchr(ap->value, '&')) {
- if (0 != collapse_special((char*)ap->value)) {
- raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
- }
- }
- ap++;
- if (MAX_ATTRS <= (ap - attrs)) {
- raise_error("too many attributes", pi->str, pi->s);
- }
- break;
- }
- c = '\0';
- }
- if (hasChildren) {
- char *start;
-
- done = 0;
- // read children
- while (!done) {
- start = pi->s;
- next_non_white(pi);
- c = *pi->s++;
- if ('\0' == c) {
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- }
- if ('<' == c) {
- switch (*pi->s) {
- case '!': /* better be a comment or CDATA */
- pi->s++;
- if ('-' == *pi->s && '-' == *(pi->s + 1)) {
- pi->s += 2;
- read_comment(pi);
- } else if (0 == strncmp("[CDATA[", pi->s, 7)) {
- pi->s += 7;
- read_cdata(pi);
- } else {
- raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s);
- }
- break;
- case '/':
- pi->s++;
- name = read_name_token(pi);
- end = pi->s;
- next_non_white(pi);
- c = *pi->s;
- *end = '\0';
- if (0 != strcmp(name, ename)) {
- raise_error("invalid format, elements overlap", pi->str, pi->s);
- }
- if ('>' != c) {
- raise_error("invalid format, element not closed", pi->str, pi->s);
- }
- pi->s++;
- pi->pcb->end_element(pi, ename);
- return;
- case '\0':
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- default:
- // a child element
- read_element(pi);
- break;
- }
- } else { // read as TEXT
- pi->s = start;
- //pi->s--;
- read_text(pi);
- //read_reduced_text(pi);
-
- // to exit read_text with no errors the next character must be <
- if ('/' == *(pi->s + 1) &&
- 0 == strncmp(ename, pi->s + 2, elen) &&
- '>' == *(pi->s + elen + 2)) {
- // close tag after text so treat as a value
- pi->s += elen + 3;
- pi->pcb->end_element(pi, ename);
- return;
- }
- }
- }
- }
-}
-
-static void
-read_text(PInfo pi) {
- char buf[MAX_TEXT_LEN];
- char *b = buf;
- char *alloc_buf = 0;
- char *end = b + sizeof(buf) - 2;
- char c;
- int done = 0;
-
- while (!done) {
- c = *pi->s++;
- switch(c) {
- case '<':
- done = 1;
- pi->s--;
- break;
- case '\0':
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- default:
- if ('&' == c) {
- c = read_coded_char(pi);
- }
- if (end <= b) {
- unsigned long size;
-
- if (0 == alloc_buf) {
- size = sizeof(buf) * 2;
- if (0 == (alloc_buf = (char*)malloc(size))) {
- raise_error("text too long", pi->str, pi->s);
- }
- memcpy(alloc_buf, buf, b - buf);
- b = alloc_buf + (b - buf);
- } else {
- unsigned long pos = b - alloc_buf;
-
- size = (end - alloc_buf) * 2;
- if (0 == (alloc_buf = (char*)realloc(alloc_buf, size))) {
- raise_error("text too long", pi->str, pi->s);
- }
- b = alloc_buf + pos;
- }
- end = alloc_buf + size - 2;
- }
- *b++ = c;
- break;
- }
- }
- *b = '\0';
- if (0 != alloc_buf) {
- pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1)));
- free(alloc_buf);
- } else {
- pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
- }
-}
-
-static char*
-read_name_token(PInfo pi) {
- char *start;
-
- next_non_white(pi);
- start = pi->s;
- for (; 1; pi->s++) {
- switch (*pi->s) {
- case ' ':
- case '\t':
- case '\f':
- case '?':
- case '=':
- case '/':
- case '>':
- case '\n':
- case '\r':
- return start;
- case '\0':
- // documents never terminate after a name token
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- break; // to avoid warnings
- default:
- break;
- }
- }
- return start;
-}
-
-static void
-read_cdata(PInfo pi) {
- char *start;
- char *end;
-
- start = pi->s;
- end = strstr(pi->s, "]]>");
- if (end == 0) {
- raise_error("invalid format, CDATA not terminated", pi->str, pi->s);
- }
- *end = '\0';
- pi->s = end + 3;
- if (0 != pi->pcb->add_cdata) {
- pi->pcb->add_cdata(pi, start, end - start);
- }
-}
-
-inline static void
-next_non_token(PInfo pi) {
- for (; 1; pi->s++) {
- switch(*pi->s) {
- case ' ':
- case '\t':
- case '\f':
- case '\n':
- case '\r':
- case '/':
- case '>':
- return;
- default:
- break;
- }
- }
-}
-
-/* Assume the value starts immediately and goes until the quote character is
- * reached again. Do not read the character after the terminating quote.
- */
-static char*
-read_quoted_value(PInfo pi) {
- char *value = 0;
-
- if ('"' == *pi->s || ('\'' == *pi->s && StrictEffort != pi->effort)) {
- char term = *pi->s;
-
- pi->s++; // skip quote character
- value = pi->s;
- for (; *pi->s != term; pi->s++) {
- if ('\0' == *pi->s) {
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- }
- }
- *pi->s = '\0'; // terminate value
- pi->s++; // move past quote
- } else if (StrictEffort == pi->effort) {
- raise_error("invalid format, expected a quote character", pi->str, pi->s);
- } else {
- value = pi->s;
- next_white(pi);
- if ('\0' == *pi->s) {
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- }
- *pi->s++ = '\0'; // terminate value
- }
- return value;
-}
-
-static int
-read_coded_char(PInfo pi) {
- char *b, buf[8];
- char *end = buf + sizeof(buf);
- char *s;
- int c;
-
- for (b = buf, s = pi->s; b < end; b++, s++) {
- if (';' == *s) {
- *b = '\0';
- s++;
- break;
- }
- *b = *s;
- }
- if (b > end) {
- return *pi->s;
- }
- if ('#' == *buf) {
- c = (int)strtol(buf + 1, &end, 10);
- if (0 >= c || '\0' != *end) {
- return *pi->s;
- }
- pi->s = s;
-
- return c;
- }
- if (0 == strcasecmp(buf, "nbsp")) {
- pi->s = s;
- return ' ';
- } else if (0 == strcasecmp(buf, "lt")) {
- pi->s = s;
- return '<';
- } else if (0 == strcasecmp(buf, "gt")) {
- pi->s = s;
- return '>';
- } else if (0 == strcasecmp(buf, "amp")) {
- pi->s = s;
- return '&';
- } else if (0 == strcasecmp(buf, "quot")) {
- pi->s = s;
- return '"';
- } else if (0 == strcasecmp(buf, "apos")) {
- pi->s = s;
- return '\'';
- }
- return *pi->s;
-}
-
-static int
-collapse_special(char *str) {
- char *s = str;
- char *b = str;
-
- while ('\0' != *s) {
- if ('&' == *s) {
- int c;
- char *end;
-
- s++;
- if ('#' == *s) {
- c = (int)strtol(s, &end, 10);
- if (';' != *end) {
- return EDOM;
- }
- s = end + 1;
- } else if (0 == strncasecmp(s, "lt;", 3)) {
- c = '<';
- s += 3;
- } else if (0 == strncasecmp(s, "gt;", 3)) {
- c = '>';
- s += 3;
- } else if (0 == strncasecmp(s, "amp;", 4)) {
- c = '&';
- s += 4;
- } else if (0 == strncasecmp(s, "quot;", 5)) {
- c = '"';
- s += 5;
- } else if (0 == strncasecmp(s, "apos;", 5)) {
- c = '\'';
- s += 5;
- } else {
- c = '?';
- while (';' != *s++) {
- if ('\0' == *s) {
- return EDOM;
- }
- }
- s++;
- }
- *b++ = (char)c;
- } else {
- *b++ = *s++;
- }
- }
- *b = '\0';
-
- return 0;
-}
-#endif
View
10 notes
@@ -7,5 +7,13 @@
- yajl-ruby is fastest out there
+- load
+ - options
+ - encoding
+ - object or raw/simple
-- write parser with callbacks
+- dump
+ - options
+ - indent
+ - object or simple (needed for Hash)
+ - call sjon on objects, skip, or raise
View
28 test/foo.rb
@@ -25,6 +25,7 @@
'12345',
'12345.6789',
'12345.6789e-30',
+ '{ "x":-33}',
].each do |s|
x = Oj.load(s)
puts ">>> #{x}(#{x.class})"
@@ -32,7 +33,10 @@
iter = 100000
s = %{
-[ true, [false, [12345, null], 3.967, ["something", false], null]]
+{ "class": "Foo::Bar",
+ "attr1": [ true, [false, [12345, null], 3.967, ["something", false], null]],
+ "attr2": { "one": 1 }
+}
}
start = Time.now
@@ -47,6 +51,26 @@
Yajl::Parser.parse(s)
end
yajl_dt = Time.now - start
-puts "%d Yajl::Parser.parse()s in %0.3f seconds or %0.1f parsed/msec" % [iter, yajl_dt, iter/yajl_dt/1000.0]
+puts "%d Yajl::Parser.parse()s in %0.3f seconds or %0.1f parses/msec" % [iter, yajl_dt, iter/yajl_dt/1000.0]
+
+puts "Oj is %0.1f times faster than YAJL" % [yajl_dt / oj_dt]
+
+
+obj = Oj.load(s)
+
+start = Time.now
+iter.times do
+ Oj.dump(obj)
+end
+oj_dt = Time.now - start
+puts "%d Oj.dump()s in %0.3f seconds or %0.1f dumps/msec" % [iter, oj_dt, iter/oj_dt/1000.0]
+
+start = Time.now
+iter.times do
+ Yajl::Encoder.encode(obj)
+end
+yajl_dt = Time.now - start
+puts "%d Yajl::Encoder.encode()s in %0.3f seconds or %0.1f encodes/msec" % [iter, yajl_dt, iter/yajl_dt/1000.0]
puts "Oj is %0.1f times faster than YAJL" % [yajl_dt / oj_dt]
+
Please sign in to comment.
Something went wrong with that request. Please try again.