Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

dump and load are working at 2X faster than Yajl. Need work on encodi…

…ng though.
  • Loading branch information...
commit 7bf61c31eb53e7bc8ffe0ff491d07f1ce0561cb2 1 parent b8f3b5a
Peter Ohler authored
View
479 ext/oj/dump.c
@@ -0,0 +1,479 @@
+/* dump.c
+ * Copyright (c) 2012, Peter Ohler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <time.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "ruby.h"
+#include "oj.h"
+
+typedef unsigned long ulong;
+
+typedef struct _Str {
+ const char *str;
+ size_t len;
+} *Str;
+
+typedef struct _Element {
+ struct _Str clas;
+ struct _Str attr;
+ unsigned long id;
+ int indent; // < 0 indicates no \n
+ int closed;
+ char type;
+} *Element;
+
+typedef struct _Out {
+ void (*w_start)(struct _Out *out, Element e);
+ void (*w_end)(struct _Out *out, Element e);
+ void (*w_time)(struct _Out *out, VALUE obj);
+ char *buf;
+ char *end;
+ char *cur;
+// Cache8 circ_cache;
+// unsigned long circ_cnt;
+ int indent;
+ int depth; // used by dumpHash
+ Options opts;
+ VALUE obj;
+} *Out;
+
+static void dump_obj_to_json(VALUE obj, Options copts, Out out);
+static void dump_val(VALUE obj, int depth, Out out);
+static void dump_nil(Out out);
+static void dump_true(Out out);
+static void dump_false(Out out);
+static void dump_fixnum(VALUE obj, Out out);
+static void dump_float(VALUE obj, Out out);
+static void dump_cstr(const char *str, int cnt, Out out);
+static void dump_hex(u_char c, Out out);
+static void dump_str(VALUE obj, Out out);
+static void dump_sym(VALUE obj, Out out);
+static void dump_array(VALUE obj, int depth, Out out);
+static void dump_hash(VALUE obj, int depth, Out out);
+
+static void grow(Out out, size_t len);
+static int is_json_friendly(const u_char *str, int len);
+static int json_friendly_size(const u_char *str, int len);
+
+
+static char json_friendly_chars[256] = "\
+uuuuuuuuxxxuxxuuuuuuuuuuuuuuuuuu\
+ooxooooooooooooxoooooooooooooooo\
+ooooooooooooooooooooooooooooxooo\
+ooooooooooooooooooooooooooooooou\
+uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu\
+uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu\
+uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu\
+uuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuu";
+
+inline static int
+is_json_friendly(const u_char *str, int len) {
+ for (; 0 < len; str++, len--) {
+ if ('o' != json_friendly_chars[*str]) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+inline static int
+json_friendly_size(const u_char *str, int len) {
+ int cnt = 0;
+
+ for (; 0 < len; str++, len--) {
+ switch (json_friendly_chars[*str]) {
+ case 'o': cnt++; break;
+ case 'x': cnt += 2; break;
+ case 'u': cnt += 6; break;
+ default: break;
+ }
+ }
+ return cnt;
+}
+
+inline static void
+fill_indent(Out out, int cnt) {
+ cnt *= out->indent;
+ if (0 <= cnt) {
+ *out->cur++ = '\n';
+ for (; 0 < cnt; cnt--) {
+ *out->cur++ = ' ';
+ }
+ }
+}
+
+static void
+grow(Out out, size_t len) {
+ size_t size = out->end - out->buf;
+ long pos = out->cur - out->buf;
+ char *buf;
+
+ size *= 2;
+ if (size <= len * 2 + pos) {
+ size += len;
+ }
+ if (0 == (buf = (char*)realloc(out->buf, size + 10))) { // 1 extra for terminator character plus extra (paranoid)
+ rb_raise(rb_eNoMemError, "Failed to create string. [%d:%s]\n", ENOSPC, strerror(ENOSPC));
+ }
+ out->buf = buf;
+ out->end = buf + size;
+ out->cur = out->buf + pos;
+}
+
+inline static void
+dump_hex(u_char c, Out out) {
+ u_char d = c & 0xF0;
+
+ if (9 < d) {
+ *out->cur++ = (d - 10) + 'a';
+ } else {
+ *out->cur++ = d + '0';
+ }
+ d = c & 0x0F;
+ if (9 < d) {
+ *out->cur++ = (d - 10) + 'a';
+ } else {
+ *out->cur++ = d + '0';
+ }
+}
+
+static void
+dump_nil(Out out) {
+ size_t size = 4;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = 'n';
+ *out->cur++ = 'u';
+ *out->cur++ = 'l';
+ *out->cur++ = 'l';
+ *out->cur = '\0';
+}
+
+static void
+dump_true(Out out) {
+ size_t size = 4;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = 't';
+ *out->cur++ = 'r';
+ *out->cur++ = 'u';
+ *out->cur++ = 'e';
+ *out->cur = '\0';
+}
+
+static void
+dump_false(Out out) {
+ size_t size = 5;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = 'f';
+ *out->cur++ = 'a';
+ *out->cur++ = 'l';
+ *out->cur++ = 's';
+ *out->cur++ = 'e';
+ *out->cur = '\0';
+}
+
+static void
+dump_fixnum(VALUE obj, Out out) {
+ char buf[32];
+ char *b = buf + sizeof(buf) - 1;
+ long num = NUM2LONG(obj);
+ int neg = 0;
+
+ if (0 > num) {
+ neg = 1;
+ num = -num;
+ }
+ *b-- = '\0';
+ if (0 < num) {
+ for (; 0 < num; num /= 10, b--) {
+ *b = (num % 10) + '0';
+ }
+ if (neg) {
+ *b = '-';
+ } else {
+ b++;
+ }
+ } else {
+ *b = '0';
+ }
+ if (out->end - out->cur <= (long)(sizeof(buf) - (b - buf))) {
+ grow(out, sizeof(buf) - (b - buf));
+ }
+ for (; '\0' != *b; b++) {
+ *out->cur++ = *b;
+ }
+ *out->cur = '\0';
+}
+
+static void
+dump_float(VALUE obj, Out out) {
+ char buf[64];
+ char *b;
+ int cnt = sprintf(buf, "%0.16g", RFLOAT_VALUE(obj)); // used sprintf due to bug in snprintf
+
+ if (out->end - out->cur <= (long)cnt) {
+ grow(out, cnt);
+ }
+ for (b = buf; '\0' != *b; b++) {
+ *out->cur++ = *b;
+ }
+ *out->cur = '\0';
+}
+
+static void
+dump_cstr(const char *str, int cnt, Out out) {
+ int size = json_friendly_size((u_char*)str, cnt);
+
+ if (cnt == size) {
+ cnt += 2;
+ if (out->end - out->cur <= (long)cnt) {
+ grow(out, cnt);
+ }
+ *out->cur++ = '"';
+ for (; '\0' != *str; str++) {
+ *out->cur++ = *str;
+ }
+ *out->cur++ = '"';
+ } else {
+ // TBD maybe use ruby to generate string
+ size += 2;
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = '"';
+ for (; '\0' != *str; str++) {
+ switch (json_friendly_chars[(u_char)*str]) {
+ case 'o':
+ *out->cur++ = *str;
+ break;
+ case 'x':
+ *out->cur++ = '\\';
+ switch (*str) {
+ case '\b': *out->cur++ = 'b'; break;
+ case '\t': *out->cur++ = 't'; break;
+ case '\n': *out->cur++ = 'n'; break;
+ case '\f': *out->cur++ = 'f'; break;
+ case '\r': *out->cur++ = 'r'; break;
+ default: *out->cur++ = *str; break;
+ break;
+ }
+ break;
+ case 'u':
+ *out->cur++ = '\\';
+ *out->cur++ = 'u';
+ if ((u_char)*str <= 0x7F) {
+ *out->cur++ = '0';
+ *out->cur++ = '0';
+ dump_hex((u_char)*str, out);
+ } else { // continuation?
+ // TBD lead with \u00 . grab next char?
+ *out->cur++ = '0';
+ *out->cur++ = '0';
+ dump_hex((u_char)*str, out);
+ }
+ break;
+ default:
+ // TBD raise
+ break;
+ }
+ }
+ *out->cur++ = '"';
+ }
+ *out->cur = '\0';
+}
+
+static void
+dump_str(VALUE obj, Out out) {
+ dump_cstr(StringValuePtr(obj), (int)RSTRING_LEN(obj), out);
+}
+
+static void
+dump_sym(VALUE obj, Out out) {
+ const char *sym = rb_id2name(SYM2ID(obj));
+
+ dump_cstr(sym, (int)strlen(sym), out);
+}
+
+static void
+dump_array(VALUE a, int depth, Out out) {
+ VALUE *np = RARRAY_PTR(a);
+ size_t size = 2;
+ int cnt = (int)RARRAY_LEN(a);
+ int d2 = depth + 1;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ *out->cur++ = '[';
+ if (0 == cnt) {
+ *out->cur++ = ']';
+ } else {
+ size = d2 * out->indent + 2;
+ for (; 0 < cnt; cnt--, np++) {
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ fill_indent(out, d2);
+ dump_val(*np, d2, out);
+ if (1 < cnt) {
+ // TBD check size?
+ *out->cur++ = ',';
+ }
+ }
+ size = depth * out->indent + 1;
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ fill_indent(out, depth);
+ *out->cur++ = ']';
+ }
+ *out->cur = '\0';
+}
+
+static int
+hash_cb(VALUE key, VALUE value, Out out) {
+ int depth = out->depth;
+ size_t size = depth * out->indent + 1;
+
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ fill_indent(out, depth);
+ dump_str(key, out);
+ *out->cur++ = ':';
+ dump_val(value, depth, out);
+ out->depth = depth;
+ *out->cur++ = ',';
+
+ return ST_CONTINUE;
+}
+
+static void
+dump_hash(VALUE obj, int depth, Out out) {
+ int cnt = (int)RHASH_SIZE(obj);
+
+ *out->cur++ = '{';
+ if (0 == cnt) {
+ *out->cur++ = '}';
+ } else {
+ size_t size = depth * out->indent + 2;
+
+ out->depth = depth + 1;
+ rb_hash_foreach(obj, hash_cb, (VALUE)out);
+ out->cur--; // backup to overwrite last comma
+ if (out->end - out->cur <= (long)size) {
+ grow(out, size);
+ }
+ fill_indent(out, depth);
+ *out->cur++ = '}';
+ }
+ // TBD
+ *out->cur = '\0';
+}
+
+static void
+dump_val(VALUE obj, int depth, Out out) {
+ switch (rb_type(obj)) {
+ case T_NIL: dump_nil(out); break;
+ case T_TRUE: dump_true(out); break;
+ case T_FALSE: dump_false(out); break;
+ case T_FIXNUM: dump_fixnum(obj, out); break;
+ case T_FLOAT: dump_float(obj, out); break;
+ // BIGNUM
+ case T_STRING: dump_str(obj, out); break;
+ case T_SYMBOL: dump_sym(obj, out); break;
+ case T_ARRAY: dump_array(obj, depth, out); break;
+ case T_HASH: dump_hash(obj, depth, out); break;
+ default:
+ // TBD raise, call json, or leave as nil, or get all variables
+ break;
+ }
+}
+
+static void
+dump_obj_to_json(VALUE obj, Options copts, Out out) {
+ out->buf = (char*)malloc(65336);
+ out->end = out->buf + 65325; // 1 less than end plus extra for possible errors
+ out->cur = out->buf;
+// out->circ_cache = 0;
+// out->circ_cnt = 0;
+ out->opts = copts;
+ out->obj = obj;
+/* if (Yes == copts->circular) {
+ ox_cache8_new(&out->circ_cache);
+ }*/
+ out->indent = copts->indent;
+ out->indent = 2; // TBD
+ dump_val(obj, 0, out);
+
+/* if (Yes == copts->circular) {
+ ox_cache8_delete(out->circ_cache);
+ }*/
+}
+
+char*
+write_obj_to_str(VALUE obj, Options copts) {
+ struct _Out out;
+
+ dump_obj_to_json(obj, copts, &out);
+
+ return out.buf;
+}
+
+void
+write_obj_to_file(VALUE obj, const char *path, Options copts) {
+ struct _Out out;
+ size_t size;
+ FILE *f;
+
+ dump_obj_to_json(obj, copts, &out);
+ size = out.cur - out.buf;
+ if (0 == (f = fopen(path, "w"))) {
+ rb_raise(rb_eIOError, "%s\n", strerror(errno));
+ }
+ if (size != fwrite(out.buf, 1, size, f)) {
+ int err = ferror(f);
+ rb_raise(rb_eIOError, "Write failed. [%d:%s]\n", err, strerror(err));
+ }
+ free(out.buf);
+ fclose(f);
+}
View
156 ext/oj/gen_load.c
@@ -1,156 +0,0 @@
-/* gen_load.c
- * Copyright (c) 2011, Peter Ohler
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * - Neither the name of Peter Ohler nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdlib.h>
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdarg.h>
-
-#include "ruby.h"
-#include "oj.h"
-
-static void add_obj(PInfo pi);
-static void end_obj(PInfo pi);
-static void add_array(PInfo pi);
-static void end_array(PInfo pi);
-static void add_key(PInfo pi, char *text);
-static void add_str(PInfo pi, char *text);
-static void add_int(PInfo pi, int64_t val);
-static void add_dub(PInfo pi, double val);
-static void add_true(PInfo pi);
-static void add_false(PInfo pi);
-static void add_nil(PInfo pi);
-
-struct _ParseCallbacks _oj_gen_callbacks = {
- add_obj,
- end_obj,
- add_array,
- end_array,
- add_key,
- add_str,
- add_int,
- add_dub,
- add_nil,
- add_true,
- add_false
-};
-
-ParseCallbacks oj_gen_callbacks = &_oj_gen_callbacks;
-
-static inline void
-add_val(PInfo pi, VALUE val) {
- if (0 == pi->h) {
- pi->obj = val;
- } else if (ArrayCode == pi->h->type) {
- rb_ary_push(pi->h->obj, val);
- } else if (ObjectCode == pi->h->type) {
- // TBD
- } else {
- raise_error("expected to be in an Array or Hash", pi->str, pi->s);
- }
-}
-
-static void
-add_obj(PInfo pi) {
- printf("*** add_obj\n");
-}
-
-static void
-end_obj(PInfo pi) {
- printf("*** end_obj\n");
-}
-
-static void
-add_array(PInfo pi) {
- VALUE a = rb_ary_new();
-
- if (0 == pi->h) {
- pi->h = pi->helpers;
- pi->h->obj = a;
- pi->h->type = ArrayCode;
- pi->obj = a;
- } else if (ArrayCode == pi->h->type) {
- rb_ary_push(pi->h->obj, a);
- pi->h++;
- pi->h->obj = a;
- pi->h->type = ArrayCode;
- } else if (ObjectCode == pi->h->type) {
- // TBD
- } else {
- raise_error("expected to be in an Array or Hash", pi->str, pi->s);
- }
-}
-
-static void
-end_array(PInfo pi) {
- if (0 == pi->h) {
- // TBD error
- } else if (pi->helpers < pi->h) {
- pi->h--;
- } else {
- pi->h = 0;
- }
-}
-
-static void
-add_key(PInfo pi, char *text) {
- printf("*** add_key %s\n", text);
-}
-
-static void
-add_str(PInfo pi, char *text) {
- printf("*** add_str %s\n", text);
-}
-
-static void
-add_int(PInfo pi, int64_t val) {
- printf("*** add_int %lld\n", val);
-}
-
-static void
-add_dub(PInfo pi, double val) {
- printf("*** add_dub %f\n", val);
-}
-
-static void
-add_true(PInfo pi) {
- add_val(pi, Qtrue);
-}
-
-static void
-add_false(PInfo pi) {
- add_val(pi, Qfalse);
-}
-
-static void
-add_nil(PInfo pi) {
- add_val(pi, Qnil);
-}
View
426 ext/oj/load.c
@@ -0,0 +1,426 @@
+/* load.c
+ * Copyright (c) 2012, Peter Ohler
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
+ * used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include "ruby.h"
+#include "oj.h"
+
+typedef struct _ParseInfo {
+ char *str; /* buffer being read from */
+ char *s; /* current position in buffer */
+#ifdef HAVE_RUBY_ENCODING_H
+ rb_encoding *encoding;
+#else
+ void *encoding;
+#endif
+ int trace;
+} *ParseInfo;
+
+static VALUE read_next(ParseInfo pi);
+static VALUE read_obj(ParseInfo pi);
+static VALUE read_array(ParseInfo pi);
+static VALUE read_str(ParseInfo pi);
+static VALUE read_num(ParseInfo pi);
+static VALUE read_true(ParseInfo pi);
+static VALUE read_false(ParseInfo pi);
+static VALUE read_nil(ParseInfo pi);
+static void next_non_white(ParseInfo pi);
+static char* read_quoted_value(ParseInfo pi);
+
+
+/* This XML parser is a single pass, destructive, callback parser. It is a
+ * single pass parse since it only make one pass over the characters in the
+ * XML document string. It is destructive because it re-uses the content of
+ * the string for values in the callback and places \0 characters at various
+ * places to mark the end of tokens and strings. It is a callback parser like
+ * a SAX parser because it uses callback when document elements are
+ * encountered.
+ *
+ * Parsing is very tolerant. Lack of headers and even mispelled element
+ * endings are passed over without raising an error. A best attempt is made in
+ * all cases to parse the string.
+ */
+
+inline static void
+next_non_white(ParseInfo pi) {
+ for (; 1; pi->s++) {
+ switch(*pi->s) {
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\n':
+ case '\r':
+ break;
+ default:
+ return;
+ }
+ }
+}
+
+inline static void
+next_white(ParseInfo pi) {
+ for (; 1; pi->s++) {
+ switch(*pi->s) {
+ case ' ':
+ case '\t':
+ case '\f':
+ case '\n':
+ case '\r':
+ case '\0':
+ return;
+ default:
+ break;
+ }
+ }
+}
+
+VALUE
+parse(char *json, int trace) {
+ VALUE obj;
+ struct _ParseInfo pi;
+
+ if (0 == json) {
+ raise_error("Invalid arg, xml string can not be null", json, 0);
+ }
+ if (trace) {
+ printf("Parsing JSON:\n%s\n", json);
+ }
+ /* initialize parse info */
+ pi.str = json;
+ pi.s = json;
+ pi.encoding = 0;
+ pi.trace = trace;
+ if (Qundef == (obj = read_next(&pi))) {
+ raise_error("no object read", pi.str, pi.s);
+ }
+ next_non_white(&pi); // skip white space
+ if ('\0' != *pi.s) {
+ raise_error("invalid format, extra characters", pi.str, pi.s);
+ }
+ return obj;
+}
+
+static VALUE
+read_next(ParseInfo pi) {
+ VALUE obj;
+
+ next_non_white(pi); // skip white space
+ switch (*pi->s) {
+ case '{':
+ obj = read_obj(pi);
+ break;
+ case '[':
+ obj = read_array(pi);
+ break;
+ case '"':
+ obj = read_str(pi);
+ break;
+ case '+':
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ obj = read_num(pi);
+ break;
+ case 't':
+ obj = read_true(pi);
+ break;
+ case 'f':
+ obj = read_false(pi);
+ break;
+ case 'n':
+ obj = read_nil(pi);
+ break;
+ case '\0':
+ obj = Qundef;
+ break;
+ default:
+ obj = Qundef;
+ break;
+ }
+ return obj;
+}
+
+static VALUE
+read_obj(ParseInfo pi) {
+ VALUE obj = Qundef;
+ VALUE key = Qundef;
+ VALUE val = Qundef;
+
+ pi->s++;
+ while (1) {
+ next_non_white(pi);
+ if ('"' != *pi->s || Qundef == (key = read_str(pi))) {
+ raise_error("unexpected character", pi->str, pi->s);
+ }
+ next_non_white(pi);
+ if (':' == *pi->s) {
+ pi->s++;
+ } else {
+ raise_error("invalid format, expected :", pi->str, pi->s);
+ }
+ if (Qundef == (val = read_next(pi))) {
+ raise_error("unexpected character", pi->str, pi->s);
+ }
+ if (Qundef == obj) {
+ obj = rb_hash_new();
+ }
+ rb_hash_aset(obj, key, val);
+ next_non_white(pi);
+ if ('}' == *pi->s) {
+ pi->s++;
+ break;
+ } else if (',' == *pi->s) {
+ pi->s++;
+ } else {
+ raise_error("invalid format, expected , or } while in an object", pi->str, pi->s);
+ }
+ }
+ return obj;
+}
+
+static VALUE
+read_array(ParseInfo pi) {
+ VALUE a = rb_ary_new();
+ VALUE e;
+
+ pi->s++;
+ while (1) {
+ if (Qundef == (e = read_next(pi))) {
+ raise_error("unexpected character", pi->str, pi->s);
+ }
+ rb_ary_push(a, e);
+ next_non_white(pi); // skip white space
+ if (',' == *pi->s) {
+ pi->s++;
+ } else if (']' == *pi->s) {
+ pi->s++;
+ break;
+ } else {
+ raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s);
+ }
+ }
+ return a;
+}
+
+static VALUE
+read_str(ParseInfo pi) {
+ char *text = read_quoted_value(pi);
+ VALUE s = rb_str_new2(text);
+
+#ifdef HAVE_RUBY_ENCODING_H
+ if (0 != pi->encoding) {
+ rb_enc_associate(s, pi->encoding);
+ }
+#endif
+ return s;
+}
+
+static VALUE
+read_num(ParseInfo pi) {
+ int64_t n = 0;
+ long a = 0;
+ long div = 1;
+ long e = 0;
+ int neg = 0;
+ int eneg = 0;
+
+ if ('-' == *pi->s) {
+ pi->s++;
+ neg = 1;
+ } else if ('+' == *pi->s) {
+ pi->s++;
+ }
+ for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
+ n = n * 10 + (*pi->s - '0');
+ }
+ if ('.' == *pi->s) {
+ pi->s++;
+ for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
+ a = a * 10 + (*pi->s - '0');
+ div *= 10;
+ }
+ }
+ if ('e' == *pi->s || 'E' == *pi->s) {
+ pi->s++;
+ if ('-' == *pi->s) {
+ pi->s++;
+ eneg = 1;
+ } else if ('+' == *pi->s) {
+ pi->s++;
+ }
+ for (; '0' <= *pi->s && *pi->s <= '9'; pi->s++) {
+ e = e * 10 + (*pi->s - '0');
+ }
+ }
+ if (neg) {
+ n = -n;
+ }
+ if (0 == e && 0 == a && 1 == div) {
+ return LONG2NUM(n);
+ } else {
+ double d = (double)n + (double)a / (double)div;
+
+ if (0 != e) {
+ if (eneg) {
+ e = -e;
+ }
+ d *= pow(10.0, e);
+ }
+ return DBL2NUM(d);
+ }
+}
+
+static VALUE
+read_true(ParseInfo pi) {
+ pi->s++;
+ if ('r' != *pi->s || 'u' != *(pi->s + 1) || 'e' != *(pi->s + 2)) {
+ raise_error("invalid format, expected 'true'", pi->str, pi->s);
+ }
+ pi->s += 3;
+
+ return Qtrue;
+}
+
+static VALUE
+read_false(ParseInfo pi) {
+ pi->s++;
+ if ('a' != *pi->s || 'l' != *(pi->s + 1) || 's' != *(pi->s + 2) || 'e' != *(pi->s + 3)) {
+ raise_error("invalid format, expected 'false'", pi->str, pi->s);
+ }
+ pi->s += 4;
+
+ return Qfalse;
+}
+
+static VALUE
+read_nil(ParseInfo pi) {
+ pi->s++;
+ if ('u' != *pi->s || 'l' != *(pi->s + 1) || 'l' != *(pi->s + 2)) {
+ raise_error("invalid format, expected 'nil'", pi->str, pi->s);
+ }
+ pi->s += 3;
+
+ return Qnil;
+}
+
+static char
+read_hex(ParseInfo pi, char *h) {
+ uint8_t b = 0;
+
+ if ('0' <= *h && *h <= '9') {
+ b = *h - '0';
+ } else if ('A' <= *h && *h <= 'F') {
+ b = *h - 'A' + 10;
+ } else if ('a' <= *h && *h <= 'f') {
+ b = *h - 'a' + 10;
+ } else {
+ pi->s = h;
+ raise_error("invalid hex character", pi->str, pi->s);
+ }
+ h++;
+ b = b << 4;
+ if ('0' <= *h && *h <= '9') {
+ b += *h - '0';
+ } else if ('A' <= *h && *h <= 'F') {
+ b += *h - 'A' + 10;
+ } else if ('a' <= *h && *h <= 'f') {
+ b += *h - 'a' + 10;
+ } else {
+ pi->s = h;
+ raise_error("invalid hex character", pi->str, pi->s);
+ }
+ return (char)b;
+}
+
+/* Assume the value starts immediately and goes until the quote character is
+ * reached again. Do not read the character after the terminating quote.
+ */
+static char*
+read_quoted_value(ParseInfo pi) {
+ char *value = 0;
+ char *h = pi->s; // head
+ char *t = h; // tail
+
+ h++; // skip quote character
+ t++;
+ value = h;
+ // TBD can whole string be read in and then eval-ed by ruby of there is a special character
+ for (; '"' != *h; h++, t++) {
+ if ('\0' == *h) {
+ pi->s = h;
+ raise_error("quoted string not terminated", pi->str, pi->s);
+ } else if ('\\' == *h) {
+ h++;
+ switch (*h) {
+ case 'n': *t = '\n'; break;
+ case 'r': *t = '\r'; break;
+ case 't': *t = '\t'; break;
+ case 'f': *t = '\f'; break;
+ case 'b': *t = '\b'; break;
+ case '"': *t = '"'; break;
+ case '/': *t = '/'; break;
+ case '\\': *t = '\\'; break;
+ case 'u':
+ // TBD if first character is 00 then skip it
+ h++;
+ *t = read_hex(pi, h);
+ h += 2;
+ if ('\0' != *t) {
+ t++;
+ }
+ *t = read_hex(pi, h);
+ h++;
+ break;
+ default:
+ pi->s = h;
+ raise_error("invalid escaped character", pi->str, pi->s);
+ break;
+ }
+ } else if (t != h) {
+ *t = *h;
+ }
+ }
+ *t = '\0'; // terminate value
+ pi->s = h + 1;
+
+ return value;
+}
View
41 ext/oj/oj.c
@@ -36,18 +36,25 @@
#include "ruby.h"
#include "oj.h"
+struct _Options default_options = {
+ { '\0' }, // encoding
+ 2, // indent
+ 0, // trace
+ No, // circular
+ NoMode, // mode
+// StrictEffort, // effort
+};
+
void Init_oj();
VALUE Oj = Qnil;
-extern ParseCallbacks oj_gen_callbacks;
-
-
static VALUE
load(char *json, int argc, VALUE *argv, VALUE self) {
VALUE obj;
-
- obj = parse(json, oj_gen_callbacks, 0, 0);
+
+ // TBD other options like obj mode
+ obj = parse(json, 0);
free(json);
return obj;
@@ -101,12 +108,36 @@ load_file(int argc, VALUE *argv, VALUE self) {
return load(json, argc - 1, argv + 1, self);
}
+static VALUE
+dump(int argc, VALUE *argv, VALUE self) {
+ char *json;
+ struct _Options copts = default_options;
+ VALUE rstr;
+
+ if (2 == argc) {
+ //parse_dump_options(argv[1], &copts);
+ }
+ if (0 == (json = write_obj_to_str(*argv, &copts))) {
+ rb_raise(rb_eNoMemError, "Not enough memory.\n");
+ }
+ rstr = rb_str_new2(json);
+#ifdef ENCODING_INLINE_MAX
+ if ('\0' != *copts.encoding) {
+ rb_enc_associate(rstr, rb_enc_find(copts.encoding));
+ }
+#endif
+ free(json);
+
+ return rstr;
+}
+
void Init_oj() {
Oj = rb_define_module("Oj");
rb_define_module_function(Oj, "load", load_str, -1);
rb_define_module_function(Oj, "load_file", load_file, -1);
+ rb_define_module_function(Oj, "dump", dump, -1);
}
void
View
88 ext/oj/oj.h
@@ -43,7 +43,6 @@ extern "C" {
// HAVE_RUBY_ENCODING_H defined for Ruby 1.9
#include "ruby/encoding.h"
#endif
-#include "cache.h"
#ifdef JRUBY
#define NO_RSTRUCT 1
@@ -61,75 +60,30 @@ extern "C" {
#define raise_error(msg, xml, current) _raise_error(msg, xml, current, __FILE__, __LINE__)
-#define MAX_TEXT_LEN 4096
-#define MAX_DEPTH 1024
-
typedef enum {
- NoCode = 0,
- ArrayCode = 'a',
- String64Code = 'b', // base64 encoded String
- ClassCode = 'c',
- Symbol64Code = 'd', // base64 encoded Symbol
- FloatCode = 'f',
- RegexpCode = 'g',
- HashCode = 'h',
- FixnumCode = 'i',
- BignumCode = 'j',
- KeyCode = 'k', // indicates the value is a hash key, kind of a hack
- RationalCode = 'l',
- SymbolCode = 'm',
- FalseClassCode = 'n',
- ObjectCode = 'o',
- RefCode = 'p',
- RangeCode = 'r',
- StringCode = 's',
- TimeCode = 't',
- StructCode = 'u',
- ComplexCode = 'v',
- RawCode = 'x',
- TrueClassCode = 'y',
- NilClassCode = 'z',
-} Type;
-
-typedef struct _Helper {
- ID var; /* Object var ID */
- VALUE obj; /* object created or Qundef if not appropriate */
- Type type;
-} *Helper;
-
-typedef struct _PInfo *PInfo;
-
-typedef struct _ParseCallbacks {
- void (*add_obj)(PInfo pi);
- void (*end_obj)(PInfo pi);
- void (*add_array)(PInfo pi);
- void (*end_array)(PInfo pi);
- void (*add_key)(PInfo pi, char *text);
- void (*add_str)(PInfo pi, char *text);
- void (*add_int)(PInfo pi, int64_t val);
- void (*add_dub)(PInfo pi, double val);
- void (*add_nil)(PInfo pi);
- void (*add_true)(PInfo pi);
- void (*add_false)(PInfo pi);
-} *ParseCallbacks;
+ Yes = 'y',
+ No = 'n',
+ NotSet = 0
+} YesNo;
-/* parse information structure */
-struct _PInfo {
- struct _Helper helpers[MAX_DEPTH];
- Helper h; /* current helper or 0 if not set */
- char *str; /* buffer being read from */
- char *s; /* current position in buffer */
- ParseCallbacks pcb;
- VALUE obj;
-#ifdef HAVE_RUBY_ENCODING_H
- rb_encoding *encoding;
-#else
- void *encoding;
-#endif
- int trace;
-};
+typedef enum {
+ ObjMode = 'o',
+ GenMode = 'g',
+ NoMode = 0
+} LoadMode;
+
+typedef struct _Options {
+ char encoding[64]; // encoding, stored in the option to avoid GC invalidation in default values
+ int indent; // indention for dump, default 2
+ int trace; // trace level
+ char circular; // YesNo
+ char mode; // LoadMode
+ char effort; // Effort
+} *Options;
+
+extern VALUE parse(char *json, int trace);
+extern char* write_obj_to_str(VALUE obj, Options copts);
-extern VALUE parse(char *json, ParseCallbacks pcb, char **endp, int trace);
extern void _raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
View
780 ext/oj/parse.c
@@ -1,780 +0,0 @@
-/* parse.c
- * Copyright (c) 2011, Peter Ohler
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * - Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
- *
- * - Neither the name of Peter Ohler nor the names of its contributors may be
- * used to endorse or promote products derived from this software without
- * specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdlib.h>
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "ruby.h"
-#include "oj.h"
-
-//static void read_instruction(PInfo pi);
-//static void read_doctype(PInfo pi);
-//static void read_comment(PInfo pi);
-//static void read_element(PInfo pi);
-//static void read_text(PInfo pi);
-//static void read_cdata(PInfo pi);
-//static char* read_name_token(PInfo pi);
-//static char* read_quoted_value(PInfo pi);
-//static int read_coded_char(PInfo pi);
-//static void next_non_white(PInfo pi);
-//static int collapse_special(char *str);
-
-static void read_next(PInfo pi);
-static void read_obj(PInfo pi);
-static void read_array(PInfo pi);
-static void read_str(PInfo pi);
-static void read_num(PInfo pi);
-static void read_true(PInfo pi);
-static void read_false(PInfo pi);
-static void read_nil(PInfo pi);
-
-
-/* This XML parser is a single pass, destructive, callback parser. It is a
- * single pass parse since it only make one pass over the characters in the
- * XML document string. It is destructive because it re-uses the content of
- * the string for values in the callback and places \0 characters at various
- * places to mark the end of tokens and strings. It is a callback parser like
- * a SAX parser because it uses callback when document elements are
- * encountered.
- *
- * Parsing is very tolerant. Lack of headers and even mispelled element
- * endings are passed over without raising an error. A best attempt is made in
- * all cases to parse the string.
- */
-
-inline static void
-next_non_white(PInfo pi) {
- for (; 1; pi->s++) {
- switch(*pi->s) {
- case ' ':
- case '\t':
- case '\f':
- case '\n':
- case '\r':
- break;
- default:
- return;
- }
- }
-}
-
-inline static void
-next_white(PInfo pi) {
- for (; 1; pi->s++) {
- switch(*pi->s) {
- case ' ':
- case '\t':
- case '\f':
- case '\n':
- case '\r':
- case '\0':
- return;
- default:
- break;
- }
- }
-}
-
-VALUE
-parse(char *json, ParseCallbacks pcb, char **endp, int trace) {
- struct _PInfo pi;
-
- if (0 == json) {
- raise_error("Invalid arg, xml string can not be null", json, 0);
- }
- if (trace) {
- printf("Parsing JSON:\n%s\n", json);
- }
- /* initialize parse info */
- pi.str = json;
- pi.s = json;
- pi.h = 0;
- pi.pcb = pcb;
- pi.obj = Qnil;
- pi.encoding = 0;
- pi.trace = trace;
- read_next(&pi);
- next_non_white(&pi); // skip white space
- if ('\0' != *pi.s) {
- raise_error("invalid format, extra characters", pi.str, pi.s);
- }
- return pi.obj;
-}
-
-static void
-read_next(PInfo pi) {
- next_non_white(pi); // skip white space
- switch (*pi->s) {
- case '{':
- pi->s++;
- read_obj(pi);
- break;
- case '[':
- pi->s++;
- read_array(pi);
- break;
- case '"':
- pi->s++;
- read_str(pi);
- break;
- case '+':
- case '-':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- read_num(pi);
- break;
- case 't':
- read_true(pi);
- break;
- case 'f':
- read_false(pi);
- break;
- case 'n':
- read_nil(pi);
- break;
- case '\0':
- break;
- default:
- break;
- }
-}
-
-static void
-read_obj(PInfo pi) {
-}
-
-static void
-read_array(PInfo pi) {
- if (0 != pi->pcb->add_array) {
- pi->pcb->add_array(pi);
- }
- while (1) {
- read_next(pi);
- next_non_white(pi); // skip white space
- if (',' == *pi->s) {
- pi->s++;
- } else if (']' == *pi->s) {
- pi->s++;
- break;
- } else {
- raise_error("invalid format, expected , or ] while in an array", pi->str, pi->s);
- }
- }
- if (0 != pi->pcb->end_array) {
- pi->pcb->end_array(pi);
- }
-}
-
-static void
-read_str(PInfo pi) {
-}
-
-static void
-read_num(PInfo pi) {
-}
-
-static void
-read_true(PInfo pi) {
- pi->s++;
- if ('r' != *pi->s || 'u' != *(pi->s + 1) || 'e' != *(pi->s + 2)) {
- raise_error("invalid format, expected 'true'", pi->str, pi->s);
- }
- pi->s += 3;
- if (0 != pi->pcb->add_true) {
- pi->pcb->add_true(pi);
- }
-}
-
-static void
-read_false(PInfo pi) {
- pi->s++;
- if ('a' != *pi->s || 'l' != *(pi->s + 1) || 's' != *(pi->s + 2) || 'e' != *(pi->s + 3)) {
- raise_error("invalid format, expected 'false'", pi->str, pi->s);
- }
- pi->s += 4;
- if (0 != pi->pcb->add_false) {
- pi->pcb->add_false(pi);
- }
-}
-
-static void
-read_nil(PInfo pi) {
- pi->s++;
- if ('u' != *pi->s || 'l' != *(pi->s + 1) || 'l' != *(pi->s + 2)) {
- raise_error("invalid format, expected 'nil'", pi->str, pi->s);
- }
- pi->s += 3;
- if (0 != pi->pcb->add_nil) {
- pi->pcb->add_nil(pi);
- }
-}
-
-
-#if 0
-/* Entered after the "<?" sequence. Ready to read the rest.
- */
-static void
-read_instruction(PInfo pi) {
- struct _Attr attrs[MAX_ATTRS + 1];
- Attr a = attrs;
- char *target;
- char *end;
- char c;
-
- memset(attrs, 0, sizeof(attrs));
- target = read_name_token(pi);
- end = pi->s;
- next_non_white(pi);
- c = *pi->s;
- *end = '\0'; // terminate name
- if ('?' != c) {
- while ('?' != *pi->s) {
- if ('\0' == *pi->s) {
- raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
- }
- next_non_white(pi);
- a->name = read_name_token(pi);
- end = pi->s;
- next_non_white(pi);
- if ('=' != *pi->s++) {
- raise_error("invalid format, no attribute value", pi->str, pi->s);
- }
- *end = '\0'; // terminate name
- // read value
- next_non_white(pi);
- a->value = read_quoted_value(pi);
- a++;
- if (MAX_ATTRS <= (a - attrs)) {
- raise_error("too many attributes", pi->str, pi->s);
- }
- }
- if ('?' == *pi->s) {
- pi->s++;
- }
- } else {
- pi->s++;
- }
- if ('>' != *pi->s++) {
- raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
- }
- if (0 != pi->pcb->instruct) {
- pi->pcb->instruct(pi, target, attrs);
- }
-}
-
-/* Entered after the "<!DOCTYPE" sequence plus the first character after
- * that. Ready to read the rest. Returns error code.
- */
-static void
-read_doctype(PInfo pi) {
- char *docType;
- int depth = 1;
- char c;
-
- next_non_white(pi);
- docType = pi->s;
- while (1) {
- c = *pi->s++;
- if ('\0' == c) {
- raise_error("invalid format, prolog not terminated", pi->str, pi->s);
- } else if ('<' == c) {
- depth++;
- } else if ('>' == c) {
- depth--;
- if (0 == depth) { /* done, at the end */
- pi->s--;
- break;
- }
- }
- }
- *pi->s = '\0';
- pi->s++;
- if (0 != pi->pcb->add_doctype) {
- pi->pcb->add_doctype(pi, docType);
- }
-}
-
-/* Entered after "<!--". Returns error code.
- */
-static void
-read_comment(PInfo pi) {
- char *end;
- char *s;
- char *comment;
- int done = 0;
-
- next_non_white(pi);
- comment = pi->s;
- end = strstr(pi->s, "-->");
- if (0 == end) {
- raise_error("invalid format, comment not terminated", pi->str, pi->s);
- }
- for (s = end - 1; pi->s < s && !done; s--) {
- switch(*s) {
- case ' ':
- case '\t':
- case '\f':
- case '\n':
- case '\r':
- break;
- default:
- *(s + 1) = '\0';
- done = 1;
- break;
- }
- }
- *end = '\0'; // in case the comment was blank
- pi->s = end + 3;
- if (0 != pi->pcb->add_comment) {
- pi->pcb->add_comment(pi, comment);
- }
-}
-
-/* Entered after the '<' and the first character after that. Returns status
- * code.
- */
-static void
-read_element(PInfo pi) {
- struct _Attr attrs[MAX_ATTRS];
- Attr ap = attrs;
- char *name;
- char *ename;
- char *end;
- char c;
- long elen;
- int hasChildren = 0;
- int done = 0;
-
- ename = read_name_token(pi);
- end = pi->s;
- elen = end - ename;
- next_non_white(pi);
- c = *pi->s;
- *end = '\0';
- if ('/' == c) {
- /* empty element, no attributes and no children */
- pi->s++;
- if ('>' != *pi->s) {
- //printf("*** '%s' ***\n", pi->s);
- raise_error("invalid format, element not closed", pi->str, pi->s);
- }
- pi->s++; /* past > */
- ap->name = 0;
- pi->pcb->add_element(pi, ename, attrs, hasChildren);
- pi->pcb->end_element(pi, ename);
-
- return;
- }
- /* read attribute names until the close (/ or >) is reached */
- while (!done) {
- if ('\0' == c) {
- next_non_white(pi);
- c = *pi->s;
- }
- switch (c) {
- case '\0':
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- case '/':
- // Element with just attributes.
- pi->s++;
- if ('>' != *pi->s) {
- raise_error("invalid format, element not closed", pi->str, pi->s);
- }
- pi->s++;
- ap->name = 0;
- pi->pcb->add_element(pi, ename, attrs, hasChildren);
- pi->pcb->end_element(pi, ename);
-
- return;
- case '>':
- // has either children or a value
- pi->s++;
- hasChildren = 1;
- done = 1;
- ap->name = 0;
- pi->pcb->add_element(pi, ename, attrs, hasChildren);
- break;
- default:
- // Attribute name so it's an element and the attribute will be
- // added to it.
- ap->name = read_name_token(pi);
- end = pi->s;
- next_non_white(pi);
- if ('=' != *pi->s++) {
- raise_error("invalid format, no attribute value", pi->str, pi->s);
- }
- *end = '\0'; // terminate name
- // read value
- next_non_white(pi);
- ap->value = read_quoted_value(pi);
- if (0 != strchr(ap->value, '&')) {
- if (0 != collapse_special((char*)ap->value)) {
- raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
- }
- }
- ap++;
- if (MAX_ATTRS <= (ap - attrs)) {
- raise_error("too many attributes", pi->str, pi->s);
- }
- break;
- }
- c = '\0';
- }
- if (hasChildren) {
- char *start;
-
- done = 0;
- // read children
- while (!done) {
- start = pi->s;
- next_non_white(pi);
- c = *pi->s++;
- if ('\0' == c) {
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- }
- if ('<' == c) {
- switch (*pi->s) {
- case '!': /* better be a comment or CDATA */
- pi->s++;
- if ('-' == *pi->s && '-' == *(pi->s + 1)) {
- pi->s += 2;
- read_comment(pi);
- } else if (0 == strncmp("[CDATA[", pi->s, 7)) {
- pi->s += 7;
- read_cdata(pi);
- } else {
- raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s);
- }
- break;
- case '/':
- pi->s++;
- name = read_name_token(pi);
- end = pi->s;
- next_non_white(pi);
- c = *pi->s;
- *end = '\0';
- if (0 != strcmp(name, ename)) {
- raise_error("invalid format, elements overlap", pi->str, pi->s);
- }
- if ('>' != c) {
- raise_error("invalid format, element not closed", pi->str, pi->s);
- }
- pi->s++;
- pi->pcb->end_element(pi, ename);
- return;
- case '\0':
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- default:
- // a child element
- read_element(pi);
- break;
- }
- } else { // read as TEXT
- pi->s = start;
- //pi->s--;
- read_text(pi);
- //read_reduced_text(pi);
-
- // to exit read_text with no errors the next character must be <
- if ('/' == *(pi->s + 1) &&
- 0 == strncmp(ename, pi->s + 2, elen) &&
- '>' == *(pi->s + elen + 2)) {
- // close tag after text so treat as a value
- pi->s += elen + 3;
- pi->pcb->end_element(pi, ename);
- return;
- }
- }
- }
- }
-}
-
-static void
-read_text(PInfo pi) {
- char buf[MAX_TEXT_LEN];
- char *b = buf;
- char *alloc_buf = 0;
- char *end = b + sizeof(buf) - 2;
- char c;
- int done = 0;
-
- while (!done) {
- c = *pi->s++;
- switch(c) {
- case '<':
- done = 1;
- pi->s--;
- break;
- case '\0':
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- default:
- if ('&' == c) {
- c = read_coded_char(pi);
- }
- if (end <= b) {
- unsigned long size;
-
- if (0 == alloc_buf) {
- size = sizeof(buf) * 2;
- if (0 == (alloc_buf = (char*)malloc(size))) {
- raise_error("text too long", pi->str, pi->s);
- }
- memcpy(alloc_buf, buf, b - buf);
- b = alloc_buf + (b - buf);
- } else {
- unsigned long pos = b - alloc_buf;
-
- size = (end - alloc_buf) * 2;
- if (0 == (alloc_buf = (char*)realloc(alloc_buf, size))) {
- raise_error("text too long", pi->str, pi->s);
- }
- b = alloc_buf + pos;
- }
- end = alloc_buf + size - 2;
- }
- *b++ = c;
- break;
- }
- }
- *b = '\0';
- if (0 != alloc_buf) {
- pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1)));
- free(alloc_buf);
- } else {
- pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
- }
-}
-
-static char*
-read_name_token(PInfo pi) {
- char *start;
-
- next_non_white(pi);
- start = pi->s;
- for (; 1; pi->s++) {
- switch (*pi->s) {
- case ' ':
- case '\t':
- case '\f':
- case '?':
- case '=':
- case '/':
- case '>':
- case '\n':
- case '\r':
- return start;
- case '\0':
- // documents never terminate after a name token
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- break; // to avoid warnings
- default:
- break;
- }
- }
- return start;
-}
-
-static void
-read_cdata(PInfo pi) {
- char *start;
- char *end;
-
- start = pi->s;
- end = strstr(pi->s, "]]>");
- if (end == 0) {
- raise_error("invalid format, CDATA not terminated", pi->str, pi->s);
- }
- *end = '\0';
- pi->s = end + 3;
- if (0 != pi->pcb->add_cdata) {
- pi->pcb->add_cdata(pi, start, end - start);
- }
-}
-
-inline static void
-next_non_token(PInfo pi) {
- for (; 1; pi->s++) {
- switch(*pi->s) {
- case ' ':
- case '\t':
- case '\f':
- case '\n':
- case '\r':
- case '/':
- case '>':
- return;
- default:
- break;
- }
- }
-}
-
-/* Assume the value starts immediately and goes until the quote character is
- * reached again. Do not read the character after the terminating quote.
- */
-static char*
-read_quoted_value(PInfo pi) {
- char *value = 0;
-
- if ('"' == *pi->s || ('\'' == *pi->s && StrictEffort != pi->effort)) {
- char term = *pi->s;
-
- pi->s++; // skip quote character
- value = pi->s;
- for (; *pi->s != term; pi->s++) {
- if ('\0' == *pi->s) {
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- }
- }
- *pi->s = '\0'; // terminate value
- pi->s++; // move past quote
- } else if (StrictEffort == pi->effort) {
- raise_error("invalid format, expected a quote character", pi->str, pi->s);
- } else {
- value = pi->s;
- next_white(pi);
- if ('\0' == *pi->s) {
- raise_error("invalid format, document not terminated", pi->str, pi->s);
- }
- *pi->s++ = '\0'; // terminate value
- }
- return value;
-}
-
-static int
-read_coded_char(PInfo pi) {
- char *b, buf[8];
- char *end = buf + sizeof(buf);
- char *s;
- int c;
-
- for (b = buf, s = pi->s; b < end; b++, s++) {
- if (';' == *s) {
- *b = '\0';
- s++;
- break;
- }
- *b = *s;
- }
- if (b > end) {
- return *pi->s;
- }
- if ('#' == *buf) {
- c = (int)strtol(buf + 1, &end, 10);
- if (0 >= c || '\0' != *end) {
- return *pi->s;
- }
- pi->s = s;
-
- return c;
- }
- if (0 == strcasecmp(buf, "nbsp")) {
- pi->s = s;
- return ' ';
- } else if (0 == strcasecmp(buf, "lt")) {
- pi->s = s;
- return '<';
- } else if (0 == strcasecmp(buf, "gt")) {
- pi->s = s;
- return '>';
- } else if (0 == strcasecmp(buf, "amp")) {
- pi->s = s;
- return '&';
- } else if (0 == strcasecmp(buf, "quot")) {
- pi->s = s;
- return '"';
- } else if (0 == strcasecmp(buf, "apos")) {
- pi->s = s;
- return '\'';
- }
- return *pi->s;
-}
-
-static int
-collapse_special(char *str) {
- char *s = str;
- char *b = str;
-
- while ('\0' != *s) {
- if ('&' == *s) {
- int c;
- char *end;
-
- s++;
- if ('#' == *s) {
- c = (int)strtol(s, &end, 10);
- if (';' != *end) {
- return EDOM;
- }
- s = end + 1;
- } else if (0 == strncasecmp(s, "lt;", 3)) {
- c = '<';
- s += 3;
- } else if (0 == strncasecmp(s, "gt;", 3)) {
- c = '>';
- s += 3;
- } else if (0 == strncasecmp(s, "amp;", 4)) {
- c = '&';
- s += 4;
- } else if (0 == strncasecmp(s, "quot;", 5)) {
- c = '"';
- s += 5;
- } else if (0 == strncasecmp(s, "apos;", 5)) {
- c = '\'';
- s += 5;
- } else {
- c = '?';
- while (';' != *s++) {
- if ('\0' == *s) {
- return EDOM;
- }
- }
- s++;
- }
- *b++ = (char)c;
- } else {
- *b++ = *s++;
- }
- }
- *b = '\0';
-
- return 0;
-}
-#endif
View
10 notes
@@ -7,5 +7,13 @@
- yajl-ruby is fastest out there
+- load
+ - options
+ - encoding
+ - object or raw/simple
-- write parser with callbacks
+- dump
+ - options
+ - indent
+ - object or simple (needed for Hash)
+ - call sjon on objects, skip, or raise
View
28 test/foo.rb
@@ -25,6 +25,7 @@
'12345',
'12345.6789',
'12345.6789e-30',
+ '{ "x":-33}',
].each do |s|
x = Oj.load(s)
puts ">>> #{x}(#{x.class})"
@@ -32,7 +33,10 @@
iter = 100000
s = %{
-[ true, [false, [12345, null], 3.967, ["something", false], null]]
+{ "class": "Foo::Bar",
+ "attr1": [ true, [false, [12345, null], 3.967, ["something", false], null]],
+ "attr2": { "one": 1 }
+}
}
start = Time.now
@@ -47,6 +51,26 @@
Yajl::Parser.parse(s)
end
yajl_dt = Time.now - start
-puts "%d Yajl::Parser.parse()s in %0.3f seconds or %0.1f parsed/msec" % [iter, yajl_dt, iter/yajl_dt/1000.0]
+puts "%d Yajl::Parser.parse()s in %0.3f seconds or %0.1f parses/msec" % [iter, yajl_dt, iter/yajl_dt/1000.0]
+
+puts "Oj is %0.1f times faster than YAJL" % [yajl_dt / oj_dt]
+
+
+obj = Oj.load(s)
+
+start = Time.now
+iter.times do
+ Oj.dump(obj)
+end
+oj_dt = Time.now - start
+puts "%d Oj.dump()s in %0.3f seconds or %0.1f dumps/msec" % [iter, oj_dt, iter/oj_dt/1000.0]
+
+start = Time.now
+iter.times do
+ Yajl::Encoder.encode(obj)
+end
+yajl_dt = Time.now - start
+puts "%d Yajl::Encoder.encode()s in %0.3f seconds or %0.1f encodes/msec" % [iter, yajl_dt, iter/yajl_dt/1000.0]
puts "Oj is %0.1f times faster than YAJL" % [yajl_dt / oj_dt]
+
Please sign in to comment.
Something went wrong with that request. Please try again.