Permalink
Browse files

Properly handle multiple unicode keys inside of a dictionary

Previously, py-yajl used `yajl_gen_number()` in order to print a non-escaped
buffer to the output stream. The ENSURE_NOT_KEY macro inside of yajl_gen.c
would cause entries to be dropped and an unchecked `yajl_gen_keys_must_be_strings`
status would be returned.

In order to work around this, a portion of code has been lifted from yajl_gen.c and
added to yajl_hacks.c which adds the function `yajl_gen_raw_string()` which prints
the buffer, unescaped, to the output stream without calling ENSURE_NOT_KEY.

http://github.com/rtyler/py-yajl/issues/#issue/12

Change-Id: I75a71573c4949d04ad4d532c27f2b64486db906e
  • Loading branch information...
1 parent c619aea commit 85f3aa53e030f7a2fc51b41db580431d98948990 @rtyler committed Apr 7, 2010
Showing with 142 additions and 5 deletions.
  1. +7 −5 encoder.c
  2. +1 −0 setup.py
  3. +2 −0 tests/python2.py
  4. +9 −0 tests/unit.py
  5. +123 −0 yajl_hacks.c
View
@@ -40,6 +40,10 @@
static const char *hexdigit = "0123456789abcdef";
+/* Located in yajl_hacks.c */
+extern yajl_gen_status yajl_gen_raw_string(yajl_gen g,
+ const unsigned char * str, unsigned int len);
+
static yajl_gen_status ProcessObject(_YajlEncoder *self, PyObject *object)
{
yajl_gen handle = (yajl_gen)(self->_generator);
@@ -62,10 +66,9 @@ static yajl_gen_status ProcessObject(_YajlEncoder *self, PyObject *object)
* Create a buffer with enough space for code-points, preceeding and
* following quotes and a null termination character
*/
- char *buffer = (char *)(malloc(sizeof(char) * (3 + length * 6)));
+ char *buffer = (char *)(malloc(sizeof(char) * (1 + length * 6)));
unsigned int offset = 0;
- buffer[offset++] = '\"';
while (length-- > 0) {
Py_UNICODE ch = *raw_unicode++;
@@ -138,9 +141,8 @@ static yajl_gen_status ProcessObject(_YajlEncoder *self, PyObject *object)
continue;
}
}
- buffer[offset++] = '\"';
- buffer[offset + 1] = '\0';
- return yajl_gen_number(handle, (const char *)(buffer), (unsigned int)(offset));
+ buffer[offset] = '\0';
+ return yajl_gen_raw_string(handle, (const unsigned char *)(buffer), (unsigned int)(offset));
}
#ifdef IS_PYTHON3
if (PyBytes_Check(object)) {
View
@@ -16,6 +16,7 @@
'yajl.c',
'encoder.c',
'decoder.c',
+ 'yajl_hacks.c',
'yajl/src/yajl_alloc.c',
'yajl/src/yajl_buf.c',
'yajl/src/yajl.c',
View
@@ -9,3 +9,5 @@
IssueSevenTest_latin1_char = u'f\xe9in'
# u'早安, 爸爸' # Good morning!
IssueSevenTest_chinese_char = u'\u65e9\u5b89, \u7238\u7238'
+
+IssueTwelveTest_dict = {u'a' : u'b', u'c' : u'd'}
View
@@ -285,6 +285,15 @@ def testLong(self):
result = yajl.loads(yajl.dumps(data))
self.assertEquals({'1': 2}, result)
+class IssueTwelveTest(unittest.TestCase):
+ def runTest(self):
+ normal = {'a' : 'b', 'c' : 'd'}
+ self.assertEquals(yajl.dumps(normal), '{"a":"b","c":"d"}')
+
+ if not is_python3():
+ from tests import python2
+ self.assertEquals(yajl.dumps(python2.IssueTwelveTest_dict), '{"a":"b","c":"d"}')
+
if __name__ == '__main__':
verbosity = '-v' in sys.argv and 2 or 1
runner = unittest.TextTestRunner(verbosity=verbosity)
View
@@ -0,0 +1,123 @@
+/*
+ * Copyright 2010, R. Tyler Ballance <tyler@monkeypox.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. Neither the name of R. Tyler Ballance nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <yajl_encode.h>
+
+
+/*
+ * This code was yanked largely from yajl_gen.c
+ * it is merely a set of hacks
+ */
+
+typedef enum {
+ yajl_gen_start,
+ yajl_gen_map_start,
+ yajl_gen_map_key,
+ yajl_gen_map_val,
+ yajl_gen_array_start,
+ yajl_gen_in_array,
+ yajl_gen_complete,
+ yajl_gen_error
+} yajl_gen_state;
+
+struct yajl_gen_t
+{
+ unsigned int depth;
+ unsigned int pretty;
+ const char * indentString;
+ yajl_gen_state state[YAJL_MAX_DEPTH];
+ yajl_print_t print;
+ void * ctx; /* yajl_buf */
+ /* memory allocation routines */
+ yajl_alloc_funcs alloc;
+};
+
+#define INSERT_SEP \
+ if (g->state[g->depth] == yajl_gen_map_key || \
+ g->state[g->depth] == yajl_gen_in_array) { \
+ g->print(g->ctx, ",", 1); \
+ if (g->pretty) g->print(g->ctx, "\n", 1); \
+ } else if (g->state[g->depth] == yajl_gen_map_val) { \
+ g->print(g->ctx, ":", 1); \
+ if (g->pretty) g->print(g->ctx, " ", 1); \
+ }
+
+#define INSERT_WHITESPACE \
+ if (g->pretty) { \
+ if (g->state[g->depth] != yajl_gen_map_val) { \
+ unsigned int _i; \
+ for (_i=0;_i<g->depth;_i++) \
+ g->print(g->ctx, g->indentString, \
+ strlen(g->indentString)); \
+ } \
+ }
+/* check that we're not complete, or in error state. in a valid state
+ * to be generating */
+#define ENSURE_VALID_STATE \
+ if (g->state[g->depth] == yajl_gen_error) { \
+ return yajl_gen_in_error_state;\
+ } else if (g->state[g->depth] == yajl_gen_complete) { \
+ return yajl_gen_generation_complete; \
+ }
+
+#define APPENDED_ATOM \
+ switch (g->state[g->depth]) { \
+ case yajl_gen_start: \
+ g->state[g->depth] = yajl_gen_complete; \
+ break; \
+ case yajl_gen_map_start: \
+ case yajl_gen_map_key: \
+ g->state[g->depth] = yajl_gen_map_val; \
+ break; \
+ case yajl_gen_array_start: \
+ g->state[g->depth] = yajl_gen_in_array; \
+ break; \
+ case yajl_gen_map_val: \
+ g->state[g->depth] = yajl_gen_map_key; \
+ break; \
+ default: \
+ break; \
+ } \
+
+#define FINAL_NEWLINE \
+ if (g->pretty && g->state[g->depth] == yajl_gen_complete) \
+ g->print(g->ctx, "\n", 1);
+
+yajl_gen_status yajl_gen_raw_string(yajl_gen g, const unsigned char * str, unsigned int len)
+{
+ ENSURE_VALID_STATE; INSERT_SEP; INSERT_WHITESPACE;
+ g->print(g->ctx, "\"", 1);
+ g->print(g->ctx, str, len);
+ g->print(g->ctx, "\"", 1);
+ APPENDED_ATOM;
+ FINAL_NEWLINE;
+ return yajl_gen_status_ok;
+}

0 comments on commit 85f3aa5

Please sign in to comment.