Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Request: support to simplified json #88

Open
wants to merge 3 commits into from

1 participant

@fungos

Added support to parse a simplified json format where it doesn't require redundant symbols as double quotes for strings and commas.

To be sure that this change wouldn't touch the normal parser I have duplicated the two string helper functions yajl_string_scan and yajl_lex_string as yajl_unquoted_string_scan and yajl_lex_unquoted_string. I know that it is uggly and we can merge these, but I felt it was safer this way.

In yajl_lex_lex I changed the cases f, t, n to jump to a invalid block that is the same as default, but this means we've lost the error yajl_lex_invalid_string.

I have tested all my json files with this parser and It does the job correctly.
Also, I haven't added any test cases for this, maybe later I will do, not sure :( .

fungos added some commits
@fungos fungos Added support to parse a simplified json format
To enable sloppy json parsing toggle yajl_allow_sloppy_format config on
by using: "yajl_config(h, yajl_allow_sloppy_format, 1);"
42cbdfa
@fungos fungos Added missing string terminators
Added \t \f \v \r as possible string terminators for unquoted strings.
8f18567
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Nov 25, 2012
  1. @fungos

    Added support to parse a simplified json format

    fungos authored
    To enable sloppy json parsing toggle yajl_allow_sloppy_format config on
    by using: "yajl_config(h, yajl_allow_sloppy_format, 1);"
  2. @fungos

    Added missing string terminators

    fungos authored
    Added \t \f \v \r as possible string terminators for unquoted strings.
Commits on Aug 16, 2014
  1. @fungos
This page is out of date. Refresh to see the latest.
View
75 include/yajl/yajl_common.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#ifndef __YAJL_COMMON_H__
+#define __YAJL_COMMON_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define YAJL_MAX_DEPTH 128
+
+/* msft dll export gunk. To build a DLL on windows, you
+ * must define WIN32, YAJL_SHARED, and YAJL_BUILD. To use a shared
+ * DLL, you must define YAJL_SHARED and WIN32 */
+#if (defined(_WIN32) || defined(WIN32)) && defined(YAJL_SHARED)
+# ifdef YAJL_BUILD
+# define YAJL_API __declspec(dllexport)
+# else
+# define YAJL_API __declspec(dllimport)
+# endif
+#else
+# if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303
+# define YAJL_API __attribute__ ((visibility("default")))
+# else
+# define YAJL_API
+# endif
+#endif
+
+/** pointer to a malloc function, supporting client overriding memory
+ * allocation routines */
+typedef void * (*yajl_malloc_func)(void *ctx, size_t sz);
+
+/** pointer to a free function, supporting client overriding memory
+ * allocation routines */
+typedef void (*yajl_free_func)(void *ctx, void * ptr);
+
+/** pointer to a realloc function which can resize an allocation. */
+typedef void * (*yajl_realloc_func)(void *ctx, void * ptr, size_t sz);
+
+/** A structure which can be passed to yajl_*_alloc routines to allow the
+ * client to specify memory allocation functions to be used. */
+typedef struct
+{
+ /** pointer to a function that can allocate uninitialized memory */
+ yajl_malloc_func malloc;
+ /** pointer to a function that can resize memory allocations */
+ yajl_realloc_func realloc;
+ /** pointer to a function that can free memory allocated using
+ * reallocFunction or mallocFunction */
+ yajl_free_func free;
+ /** a context pointer that will be passed to above allocation routines */
+ void * ctx;
+} yajl_alloc_funcs;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
View
157 include/yajl/yajl_gen.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/**
+ * \file yajl_gen.h
+ * Interface to YAJL's JSON generation facilities.
+ */
+
+#include <yajl/yajl_common.h>
+
+#ifndef __YAJL_GEN_H__
+#define __YAJL_GEN_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ /** generator status codes */
+ typedef enum {
+ /** no error */
+ yajl_gen_status_ok = 0,
+ /** at a point where a map key is generated, a function other than
+ * yajl_gen_string was called */
+ yajl_gen_keys_must_be_strings,
+ /** YAJL's maximum generation depth was exceeded. see
+ * YAJL_MAX_DEPTH */
+ yajl_max_depth_exceeded,
+ /** A generator function (yajl_gen_XXX) was called while in an error
+ * state */
+ yajl_gen_in_error_state,
+ /** A complete JSON document has been generated */
+ yajl_gen_generation_complete,
+ /** yajl_gen_double was passed an invalid floating point value
+ * (infinity or NaN). */
+ yajl_gen_invalid_number,
+ /** A print callback was passed in, so there is no internal
+ * buffer to get from */
+ yajl_gen_no_buf,
+ /** returned from yajl_gen_string() when the yajl_gen_validate_utf8
+ * option is enabled and an invalid was passed by client code.
+ */
+ yajl_gen_invalid_string
+ } yajl_gen_status;
+
+ /** an opaque handle to a generator */
+ typedef struct yajl_gen_t * yajl_gen;
+
+ /** a callback used for "printing" the results. */
+ typedef void (*yajl_print_t)(void * ctx,
+ const char * str,
+ size_t len);
+
+ /** configuration parameters for the parser, these may be passed to
+ * yajl_gen_config() along with option specific argument(s). In general,
+ * all configuration parameters default to *off*. */
+ typedef enum {
+ /** generate indented (beautiful) output */
+ yajl_gen_beautify = 0x01,
+ /**
+ * Set an indent string which is used when yajl_gen_beautify
+ * is enabled. Maybe something like \\t or some number of
+ * spaces. The default is four spaces ' '.
+ */
+ yajl_gen_indent_string = 0x02,
+ /**
+ * Set a function and context argument that should be used to
+ * output generated json. the function should conform to the
+ * yajl_print_t prototype while the context argument is a
+ * void * of your choosing.
+ *
+ * example:
+ * yajl_gen_config(g, yajl_gen_print_callback, myFunc, myVoidPtr);
+ */
+ yajl_gen_print_callback = 0x04,
+ /**
+ * Normally the generator does not validate that strings you
+ * pass to it via yajl_gen_string() are valid UTF8. Enabling
+ * this option will cause it to do so.
+ */
+ yajl_gen_validate_utf8 = 0x08,
+ /**
+ * the forward solidus (slash or '/' in human) is not required to be
+ * escaped in json text. By default, YAJL will not escape it in the
+ * iterest of saving bytes. Setting this flag will cause YAJL to
+ * always escape '/' in generated JSON strings.
+ */
+ yajl_gen_escape_solidus = 0x10
+ } yajl_gen_option;
+
+ /** allow the modification of generator options subsequent to handle
+ * allocation (via yajl_alloc)
+ * \returns zero in case of errors, non-zero otherwise
+ */
+ YAJL_API int yajl_gen_config(yajl_gen g, yajl_gen_option opt, ...);
+
+ /** allocate a generator handle
+ * \param allocFuncs an optional pointer to a structure which allows
+ * the client to overide the memory allocation
+ * used by yajl. May be NULL, in which case
+ * malloc/free/realloc will be used.
+ *
+ * \returns an allocated handle on success, NULL on failure (bad params)
+ */
+ YAJL_API yajl_gen yajl_gen_alloc(const yajl_alloc_funcs * allocFuncs);
+
+ /** free a generator handle */
+ YAJL_API void yajl_gen_free(yajl_gen handle);
+
+ YAJL_API yajl_gen_status yajl_gen_integer(yajl_gen hand, long long int number);
+ /** generate a floating point number. number may not be infinity or
+ * NaN, as these have no representation in JSON. In these cases the
+ * generator will return 'yajl_gen_invalid_number' */
+ YAJL_API yajl_gen_status yajl_gen_double(yajl_gen hand, double number);
+ YAJL_API yajl_gen_status yajl_gen_number(yajl_gen hand,
+ const char * num,
+ size_t len);
+ YAJL_API yajl_gen_status yajl_gen_string(yajl_gen hand,
+ const unsigned char * str,
+ size_t len);
+ YAJL_API yajl_gen_status yajl_gen_null(yajl_gen hand);
+ YAJL_API yajl_gen_status yajl_gen_bool(yajl_gen hand, int boolean);
+ YAJL_API yajl_gen_status yajl_gen_map_open(yajl_gen hand);
+ YAJL_API yajl_gen_status yajl_gen_map_close(yajl_gen hand);
+ YAJL_API yajl_gen_status yajl_gen_array_open(yajl_gen hand);
+ YAJL_API yajl_gen_status yajl_gen_array_close(yajl_gen hand);
+
+ /** access the null terminated generator buffer. If incrementally
+ * outputing JSON, one should call yajl_gen_clear to clear the
+ * buffer. This allows stream generation. */
+ YAJL_API yajl_gen_status yajl_gen_get_buf(yajl_gen hand,
+ const unsigned char ** buf,
+ size_t * len);
+
+ /** clear yajl's output buffer, but maintain all internal generation
+ * state. This function will not "reset" the generator state, and is
+ * intended to enable incremental JSON outputing. */
+ YAJL_API void yajl_gen_clear(yajl_gen hand);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
View
235 include/yajl/yajl_parse.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2007-2011, Lloyd Hilaiel <lloyd@hilaiel.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/**
+ * \file yajl_parse.h
+ * Interface to YAJL's JSON stream parsing facilities.
+ */
+
+#include <yajl/yajl_common.h>
+
+#ifndef __YAJL_PARSE_H__
+#define __YAJL_PARSE_H__
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ /** error codes returned from this interface */
+ typedef enum {
+ /** no error was encountered */
+ yajl_status_ok,
+ /** a client callback returned zero, stopping the parse */
+ yajl_status_client_canceled,
+ /** An error occured during the parse. Call yajl_get_error for
+ * more information about the encountered error */
+ yajl_status_error
+ } yajl_status;
+
+ /** attain a human readable, english, string for an error */
+ YAJL_API const char * yajl_status_to_string(yajl_status code);
+
+ /** an opaque handle to a parser */
+ typedef struct yajl_handle_t * yajl_handle;
+
+ /** yajl is an event driven parser. this means as json elements are
+ * parsed, you are called back to do something with the data. The
+ * functions in this table indicate the various events for which
+ * you will be called back. Each callback accepts a "context"
+ * pointer, this is a void * that is passed into the yajl_parse
+ * function which the client code may use to pass around context.
+ *
+ * All callbacks return an integer. If non-zero, the parse will
+ * continue. If zero, the parse will be canceled and
+ * yajl_status_client_canceled will be returned from the parse.
+ *
+ * \attention {
+ * A note about the handling of numbers:
+ *
+ * yajl will only convert numbers that can be represented in a
+ * double or a 64 bit (long long) int. All other numbers will
+ * be passed to the client in string form using the yajl_number
+ * callback. Furthermore, if yajl_number is not NULL, it will
+ * always be used to return numbers, that is yajl_integer and
+ * yajl_double will be ignored. If yajl_number is NULL but one
+ * of yajl_integer or yajl_double are defined, parsing of a
+ * number larger than is representable in a double or 64 bit
+ * integer will result in a parse error.
+ * }
+ */
+ typedef struct {
+ int (* yajl_null)(void * ctx);
+ int (* yajl_boolean)(void * ctx, int boolVal);
+ int (* yajl_integer)(void * ctx, long long integerVal);
+ int (* yajl_double)(void * ctx, double doubleVal);
+ /** A callback which passes the string representation of the number
+ * back to the client. Will be used for all numbers when present */
+ int (* yajl_number)(void * ctx, const char * numberVal,
+ size_t numberLen);
+
+ /** strings are returned as pointers into the JSON text when,
+ * possible, as a result, they are _not_ null padded */
+ int (* yajl_string)(void * ctx, const unsigned char * stringVal,
+ size_t stringLen);
+
+ int (* yajl_start_map)(void * ctx);
+ int (* yajl_map_key)(void * ctx, const unsigned char * key,
+ size_t stringLen);
+ int (* yajl_end_map)(void * ctx);
+
+ int (* yajl_start_array)(void * ctx);
+ int (* yajl_end_array)(void * ctx);
+ } yajl_callbacks;
+
+ /** allocate a parser handle
+ * \param callbacks a yajl callbacks structure specifying the
+ * functions to call when different JSON entities
+ * are encountered in the input text. May be NULL,
+ * which is only useful for validation.
+ * \param afs memory allocation functions, may be NULL for to use
+ * C runtime library routines (malloc and friends)
+ * \param ctx a context pointer that will be passed to callbacks.
+ */
+ YAJL_API yajl_handle yajl_alloc(const yajl_callbacks * callbacks,
+ yajl_alloc_funcs * afs,
+ void * ctx);
+
+
+ /** configuration parameters for the parser, these may be passed to
+ * yajl_config() along with option specific argument(s). In general,
+ * all configuration parameters default to *off*. */
+ typedef enum {
+ /** Ignore javascript style comments present in
+ * JSON input. Non-standard, but rather fun
+ * arguments: toggled off with integer zero, on otherwise.
+ *
+ * example:
+ * yajl_config(h, yajl_allow_comments, 1); // turn comment support on
+ */
+ yajl_allow_comments = 0x01,
+ /**
+ * When set the parser will verify that all strings in JSON input are
+ * valid UTF8 and will emit a parse error if this is not so. When set,
+ * this option makes parsing slightly more expensive (~7% depending
+ * on processor and compiler in use)
+ *
+ * example:
+ * yajl_config(h, yajl_dont_validate_strings, 1); // disable utf8 checking
+ */
+ yajl_dont_validate_strings = 0x02,
+ /**
+ * By default, upon calls to yajl_complete_parse(), yajl will
+ * ensure the entire input text was consumed and will raise an error
+ * otherwise. Enabling this flag will cause yajl to disable this
+ * check. This can be useful when parsing json out of a that contains more
+ * than a single JSON document.
+ */
+ yajl_allow_trailing_garbage = 0x04,
+ /**
+ * Allow multiple values to be parsed by a single handle. The
+ * entire text must be valid JSON, and values can be seperated
+ * by any kind of whitespace. This flag will change the
+ * behavior of the parser, and cause it continue parsing after
+ * a value is parsed, rather than transitioning into a
+ * complete state. This option can be useful when parsing multiple
+ * values from an input stream.
+ */
+ yajl_allow_multiple_values = 0x08,
+ /**
+ * When yajl_complete_parse() is called the parser will
+ * check that the top level value was completely consumed. I.E.,
+ * if called whilst in the middle of parsing a value
+ * yajl will enter an error state (premature EOF). Setting this
+ * flag suppresses that check and the corresponding error.
+ */
+ yajl_allow_partial_values = 0x10,
+ /**
+ * Allow a simplified json format to be parsed, it doesn't require
+ * some redundant tokens as double quotes for strings and comma.
+ * arguments: toggled off with integer zero, on otherwise.
+ *
+ * example:
+ * yajl_config(h, yajl_allow_sloppy_format, 1);
+ */
+ yajl_allow_sloppy_format = 0x20
+ } yajl_option;
+
+ /** allow the modification of parser options subsequent to handle
+ * allocation (via yajl_alloc)
+ * \returns zero in case of errors, non-zero otherwise
+ */
+ YAJL_API int yajl_config(yajl_handle h, yajl_option opt, ...);
+
+ /** free a parser handle */
+ YAJL_API void yajl_free(yajl_handle handle);
+
+ /** Parse some json!
+ * \param hand - a handle to the json parser allocated with yajl_alloc
+ * \param jsonText - a pointer to the UTF8 json text to be parsed
+ * \param jsonTextLength - the length, in bytes, of input text
+ */
+ YAJL_API yajl_status yajl_parse(yajl_handle hand,
+ const unsigned char * jsonText,
+ size_t jsonTextLength);
+
+ /** Parse any remaining buffered json.
+ * Since yajl is a stream-based parser, without an explicit end of
+ * input, yajl sometimes can't decide if content at the end of the
+ * stream is valid or not. For example, if "1" has been fed in,
+ * yajl can't know whether another digit is next or some character
+ * that would terminate the integer token.
+ *
+ * \param hand - a handle to the json parser allocated with yajl_alloc
+ */
+ YAJL_API yajl_status yajl_complete_parse(yajl_handle hand);
+
+ /** get an error string describing the state of the
+ * parse.
+ *
+ * If verbose is non-zero, the message will include the JSON
+ * text where the error occured, along with an arrow pointing to
+ * the specific char.
+ *
+ * \returns A dynamically allocated string will be returned which should
+ * be freed with yajl_free_error
+ */
+ YAJL_API unsigned char * yajl_get_error(yajl_handle hand, int verbose,
+ const unsigned char * jsonText,
+ size_t jsonTextLength);
+
+ /**
+ * get the amount of data consumed from the last chunk passed to YAJL.
+ *
+ * In the case of a successful parse this can help you understand if
+ * the entire buffer was consumed (which will allow you to handle
+ * "junk at end of input").
+ *
+ * In the event an error is encountered during parsing, this function
+ * affords the client a way to get the offset into the most recent
+ * chunk where the error occured. 0 will be returned if no error
+ * was encountered.
+ */
+ YAJL_API size_t yajl_get_bytes_consumed(yajl_handle hand);
+
+ /** free an error returned from yajl_get_error */
+ YAJL_API void yajl_free_error(yajl_handle hand, unsigned char * str);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
View
185 include/yajl/yajl_tree.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2010-2011 Florian Forster <ff at octo.it>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/**
+ * \file yajl_tree.h
+ *
+ * Parses JSON data and returns the data in tree form.
+ *
+ * \author Florian Forster
+ * \date August 2010
+ *
+ * This interface makes quick parsing and extraction of
+ * smallish JSON docs trivial:
+ *
+ * \include example/parse_config.c
+ */
+
+#ifndef YAJL_TREE_H
+#define YAJL_TREE_H 1
+
+#include <yajl/yajl_common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** possible data types that a yajl_val_s can hold */
+typedef enum {
+ yajl_t_string = 1,
+ yajl_t_number = 2,
+ yajl_t_object = 3,
+ yajl_t_array = 4,
+ yajl_t_true = 5,
+ yajl_t_false = 6,
+ yajl_t_null = 7,
+ /** The any type isn't valid for yajl_val_s.type, but can be
+ * used as an argument to routines like yajl_tree_get().
+ */
+ yajl_t_any = 8
+} yajl_type;
+
+#define YAJL_NUMBER_INT_VALID 0x01
+#define YAJL_NUMBER_DOUBLE_VALID 0x02
+
+/** A pointer to a node in the parse tree */
+typedef struct yajl_val_s * yajl_val;
+
+/**
+ * A JSON value representation capable of holding one of the seven
+ * types above. For "string", "number", "object", and "array"
+ * additional data is available in the union. The "YAJL_IS_*"
+ * and "YAJL_GET_*" macros below allow type checking and convenient
+ * value extraction.
+ */
+struct yajl_val_s
+{
+ /** Type of the value contained. Use the "YAJL_IS_*" macors to check for a
+ * specific type. */
+ yajl_type type;
+ /** Type-specific data. You may use the "YAJL_GET_*" macros to access these
+ * members. */
+ union
+ {
+ char * string;
+ struct {
+ long long i; /*< integer value, if representable. */
+ double d; /*< double value, if representable. */
+ /** Signals whether the \em i and \em d members are
+ * valid. See \c YAJL_NUMBER_INT_VALID and
+ * \c YAJL_NUMBER_DOUBLE_VALID. */
+ char *r; /*< unparsed number in string form. */
+ unsigned int flags;
+ } number;
+ struct {
+ const char **keys; /*< Array of keys */
+ yajl_val *values; /*< Array of values. */
+ size_t len; /*< Number of key-value-pairs. */
+ } object;
+ struct {
+ yajl_val *values; /*< Array of elements. */
+ size_t len; /*< Number of elements. */
+ } array;
+ } u;
+};
+
+/**
+ * Parse a string.
+ *
+ * Parses an null-terminated string containing JSON data and returns a pointer
+ * to the top-level value (root of the parse tree).
+ *
+ * \param input Pointer to a null-terminated utf8 string containing
+ * JSON data.
+ * \param error_buffer Pointer to a buffer in which an error message will
+ * be stored if \em yajl_tree_parse fails, or
+ * \c NULL. The buffer will be initialized before
+ * parsing, so its content will be destroyed even if
+ * \em yajl_tree_parse succeeds.
+ * \param error_buffer_size Size of the memory area pointed to by
+ * \em error_buffer_size. If \em error_buffer_size is
+ * \c NULL, this argument is ignored.
+ *
+ * \returns Pointer to the top-level value or \c NULL on error. The memory
+ * pointed to must be freed using \em yajl_tree_free. In case of an error, a
+ * null terminated message describing the error in more detail is stored in
+ * \em error_buffer if it is not \c NULL.
+ */
+YAJL_API yajl_val yajl_tree_parse (const char *input,
+ char *error_buffer, size_t error_buffer_size);
+
+/**
+ * Free a parse tree returned by "yajl_tree_parse".
+ *
+ * \param v Pointer to a JSON value returned by "yajl_tree_parse". Passing NULL
+ * is valid and results in a no-op.
+ */
+YAJL_API void yajl_tree_free (yajl_val v);
+
+/**
+ * Access a nested value inside a tree.
+ *
+ * \param parent the node under which you'd like to extract values.
+ * \param path A null terminated array of strings, each the name of an object key
+ * \param type the yajl_type of the object you seek, or yajl_t_any if any will do.
+ *
+ * \returns a pointer to the found value, or NULL if we came up empty.
+ *
+ * Future Ideas: it'd be nice to move path to a string and implement support for
+ * a teeny tiny micro language here, so you can extract array elements, do things
+ * like .first and .last, even .length. Inspiration from JSONPath and css selectors?
+ * No it wouldn't be fast, but that's not what this API is about.
+ */
+YAJL_API yajl_val yajl_tree_get(yajl_val parent, const char ** path, yajl_type type);
+
+/* Various convenience macros to check the type of a `yajl_val` */
+#define YAJL_IS_STRING(v) (((v) != NULL) && ((v)->type == yajl_t_string))
+#define YAJL_IS_NUMBER(v) (((v) != NULL) && ((v)->type == yajl_t_number))
+#define YAJL_IS_INTEGER(v) (YAJL_IS_NUMBER(v) && ((v)->u.number.flags & YAJL_NUMBER_INT_VALID))
+#define YAJL_IS_DOUBLE(v) (YAJL_IS_NUMBER(v) && ((v)->u.number.flags & YAJL_NUMBER_DOUBLE_VALID))
+#define YAJL_IS_OBJECT(v) (((v) != NULL) && ((v)->type == yajl_t_object))
+#define YAJL_IS_ARRAY(v) (((v) != NULL) && ((v)->type == yajl_t_array ))
+#define YAJL_IS_TRUE(v) (((v) != NULL) && ((v)->type == yajl_t_true ))
+#define YAJL_IS_FALSE(v) (((v) != NULL) && ((v)->type == yajl_t_false ))
+#define YAJL_IS_NULL(v) (((v) != NULL) && ((v)->type == yajl_t_null ))
+
+/** Given a yajl_val_string return a ptr to the bare string it contains,
+ * or NULL if the value is not a string. */
+#define YAJL_GET_STRING(v) (YAJL_IS_STRING(v) ? (v)->u.string : NULL)
+
+/** Get the string representation of a number. You should check type first,
+ * perhaps using YAJL_IS_NUMBER */
+#define YAJL_GET_NUMBER(v) ((v)->u.number.r)
+
+/** Get the double representation of a number. You should check type first,
+ * perhaps using YAJL_IS_DOUBLE */
+#define YAJL_GET_DOUBLE(v) ((v)->u.number.d)
+
+/** Get the 64bit (long long) integer representation of a number. You should
+ * check type first, perhaps using YAJL_IS_INTEGER */
+#define YAJL_GET_INTEGER(v) ((v)->u.number.i)
+
+/** Get a pointer to a yajl_val_object or NULL if the value is not an object. */
+#define YAJL_GET_OBJECT(v) (YAJL_IS_OBJECT(v) ? &(v)->u.object : NULL)
+
+/** Get a pointer to a yajl_val_array or NULL if the value is not an object. */
+#define YAJL_GET_ARRAY(v) (YAJL_IS_ARRAY(v) ? &(v)->u.array : NULL)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* YAJL_TREE_H */
View
23 include/yajl/yajl_version.h
@@ -0,0 +1,23 @@
+#ifndef YAJL_VERSION_H_
+#define YAJL_VERSION_H_
+
+#include <yajl/yajl_common.h>
+
+#define YAJL_MAJOR 2
+#define YAJL_MINOR 0
+#define YAJL_MICRO 5
+
+#define YAJL_VERSION ((YAJL_MAJOR * 10000) + (YAJL_MINOR * 100) + YAJL_MICRO)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int YAJL_API yajl_version(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* YAJL_VERSION_H_ */
+
View
11 src/api/yajl_parse.h
@@ -156,7 +156,16 @@ extern "C" {
* yajl will enter an error state (premature EOF). Setting this
* flag suppresses that check and the corresponding error.
*/
- yajl_allow_partial_values = 0x10
+ yajl_allow_partial_values = 0x10,
+ /**
+ * Allow a simplified json format to be parsed, it doesn't require
+ * some redundant tokens as double quotes for strings and comma.
+ * arguments: toggled off with integer zero, on otherwise.
+ *
+ * example:
+ * yajl_config(h, yajl_allow_sloppy_format, 1);
+ */
+ yajl_allow_sloppy_format = 0x20
} yajl_option;
/** allow the modification of parser options subsequent to handle
View
7 src/yajl.c
@@ -91,6 +91,7 @@ yajl_config(yajl_handle h, yajl_option opt, ...)
case yajl_allow_trailing_garbage:
case yajl_allow_multiple_values:
case yajl_allow_partial_values:
+ case yajl_allow_sloppy_format:
if (va_arg(ap, int)) h->flags |= opt;
else h->flags &= ~opt;
break;
@@ -124,7 +125,8 @@ yajl_parse(yajl_handle hand, const unsigned char * jsonText,
if (hand->lexer == NULL) {
hand->lexer = yajl_lex_alloc(&(hand->alloc),
hand->flags & yajl_allow_comments,
- !(hand->flags & yajl_dont_validate_strings));
+ !(hand->flags & yajl_dont_validate_strings),
+ hand->flags & yajl_allow_sloppy_format);
}
status = yajl_do_parse(hand, jsonText, jsonTextLen);
@@ -144,7 +146,8 @@ yajl_complete_parse(yajl_handle hand)
if (hand->lexer == NULL) {
hand->lexer = yajl_lex_alloc(&(hand->alloc),
hand->flags & yajl_allow_comments,
- !(hand->flags & yajl_dont_validate_strings));
+ !(hand->flags & yajl_dont_validate_strings),
+ hand->flags & yajl_allow_sloppy_format);
}
return yajl_do_finish(hand);
View
174 src/yajl_lex.c
@@ -30,6 +30,7 @@ tokToStr(yajl_tok tok)
case yajl_tok_bool: return "bool";
case yajl_tok_colon: return "colon";
case yajl_tok_comma: return "comma";
+ case yajl_tok_eol: return "eol";
case yajl_tok_eof: return "eof";
case yajl_tok_error: return "error";
case yajl_tok_left_brace: return "brace";
@@ -90,6 +91,9 @@ struct yajl_lexer_t {
/* shall we validate utf8 inside strings? */
unsigned int validateUTF8;
+ /* a simplified json format is permitted for parsing */
+ unsigned int allowSloppyFormat;
+
yajl_alloc_funcs * alloc;
};
@@ -102,13 +106,15 @@ struct yajl_lexer_t {
yajl_lexer
yajl_lex_alloc(yajl_alloc_funcs * alloc,
- unsigned int allowComments, unsigned int validateUTF8)
+ unsigned int allowComments, unsigned int validateUTF8,
+ unsigned int allowSloppyFormat)
{
yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
lxr->buf = yajl_buf_alloc(alloc);
lxr->allowComments = allowComments;
lxr->validateUTF8 = validateUTF8;
+ lxr->allowSloppyFormat = allowSloppyFormat;
lxr->alloc = alloc;
return lxr;
}
@@ -128,29 +134,31 @@ yajl_lex_free(yajl_lexer lxr)
* VHC - valid hex char
* NFP - needs further processing (from a string scanning perspective)
* NUC - needs utf8 checking when enabled (from a string scanning perspective)
+ * STT - sloppy string terminator
*/
#define VEC 0x01
#define IJC 0x02
#define VHC 0x04
#define NFP 0x08
#define NUC 0x10
+#define STT 0x20
static const char charLookupTable[256] =
{
/*00*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
-/*08*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
+/*08*/ IJC , IJC|STT, IJC|STT, IJC|STT, IJC|STT, IJC|STT, IJC , IJC ,
/*10*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
/*18*/ IJC , IJC , IJC , IJC , IJC , IJC , IJC , IJC ,
-/*20*/ 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
+/*20*/ STT , 0 , NFP|VEC|IJC, 0 , 0 , 0 , 0 , 0 ,
/*28*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , VEC ,
/*30*/ VHC , VHC , VHC , VHC , VHC , VHC , VHC , VHC ,
-/*38*/ VHC , VHC , 0 , 0 , 0 , 0 , 0 , 0 ,
+/*38*/ VHC , VHC , STT , 0 , 0 , 0 , 0 , 0 ,
/*40*/ 0 , VHC , VHC , VHC , VHC , VHC , VHC , 0 ,
/*48*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
/*50*/ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
-/*58*/ 0 , 0 , 0 , 0 , NFP|VEC|IJC, 0 , 0 , 0 ,
+/*58*/ 0 , 0 , 0 , 0 , NFP|VEC|IJC|STT, 0 , 0 , 0 ,
/*60*/ 0 , VHC , VEC|VHC, VHC , VHC , VHC , VEC|VHC, 0 ,
/*68*/ 0 , 0 , 0 , 0 , 0 , 0 , VEC , 0 ,
@@ -365,6 +373,125 @@ yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
return tok;
}
+/** scan an unquoted string for interesting characters that might need
+ * further review. return the number of chars that are uninteresting
+ * and can be skipped.
+ * (lth) hi world, any thoughts on how to make this routine faster? */
+inline static size_t
+yajl_unquoted_string_scan(const unsigned char * buf, size_t len,
+ int utf8check)
+{
+ unsigned char mask = IJC|STT|(utf8check ? NUC : 0);
+ size_t skip = 0;
+ while (skip < len && !(charLookupTable[*buf] & mask))
+ {
+ skip++;
+ buf++;
+ }
+ return skip;
+}
+
+static yajl_tok
+yajl_lex_unquoted_string(yajl_lexer lexer, const unsigned char * jsonText,
+ size_t jsonTextLen, size_t * offset)
+{
+ yajl_tok tok = yajl_tok_error;
+ int hasEscapes = 0;
+
+ for (;;) {
+ unsigned char curChar;
+
+ /* now jump into a faster scanning routine to skip as much
+ * of the buffers as possible */
+ {
+ const unsigned char * p;
+ size_t len;
+
+ if ((lexer->bufInUse && yajl_buf_len(lexer->buf) &&
+ lexer->bufOff < yajl_buf_len(lexer->buf)))
+ {
+ p = ((const unsigned char *) yajl_buf_data(lexer->buf) +
+ (lexer->bufOff));
+ len = yajl_buf_len(lexer->buf) - lexer->bufOff;
+ lexer->bufOff += yajl_string_scan(p, len, lexer->validateUTF8);
+ }
+ else if (*offset < jsonTextLen)
+ {
+ p = jsonText + *offset;
+ len = jsonTextLen - *offset;
+ *offset += yajl_unquoted_string_scan(p, len, lexer->validateUTF8);
+ }
+ }
+
+ STR_CHECK_EOF;
+
+ curChar = readChar(lexer, jsonText, offset);
+
+ /* an space in an unquoted string or a attribute separator*/
+ if (charLookupTable[curChar] & STT) {
+ tok = yajl_tok_string;
+ break;
+ }
+ /* backslash escapes a set of control chars, */
+ else if (curChar == '\\') {
+ hasEscapes = 1;
+ STR_CHECK_EOF;
+
+ /* special case \u */
+ curChar = readChar(lexer, jsonText, offset);
+ if (curChar == 'u') {
+ unsigned int i = 0;
+
+ for (i=0;i<4;i++) {
+ STR_CHECK_EOF;
+ curChar = readChar(lexer, jsonText, offset);
+ if (!(charLookupTable[curChar] & VHC)) {
+ /* back up to offending char */
+ unreadChar(lexer, offset);
+ lexer->error = yajl_lex_string_invalid_hex_char;
+ goto finish_string_lex;
+ }
+ }
+ } else if (!(charLookupTable[curChar] & VEC)) {
+ /* back up to offending char */
+ unreadChar(lexer, offset);
+ lexer->error = yajl_lex_string_invalid_escaped_char;
+ goto finish_string_lex;
+ }
+ }
+ /* when not validating UTF8 it's a simple table lookup to determine
+ * if the present character is invalid */
+ else if(charLookupTable[curChar] & IJC) {
+ /* back up to offending char */
+ unreadChar(lexer, offset);
+ lexer->error = yajl_lex_string_invalid_json_char;
+ goto finish_string_lex;
+ }
+ /* when in validate UTF8 mode we need to do some extra work */
+ else if (lexer->validateUTF8) {
+ yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
+ offset, curChar);
+
+ if (t == yajl_tok_eof) {
+ tok = yajl_tok_eof;
+ goto finish_string_lex;
+ } else if (t == yajl_tok_error) {
+ lexer->error = yajl_lex_string_invalid_utf8;
+ goto finish_string_lex;
+ }
+ }
+ /* accept it, and move on */
+ }
+ finish_string_lex:
+ /* tell our buddy, the parser, wether he needs to process this string
+ * again */
+ if (hasEscapes && tok == yajl_tok_string) {
+ tok = yajl_tok_string_with_escapes;
+ }
+
+ return tok;
+}
+
#define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
static yajl_tok
@@ -503,6 +630,7 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
yajl_tok tok = yajl_tok_error;
unsigned char c;
size_t startOffset = *offset;
+ unsigned char skipStrFixup = 0;
*outBuf = NULL;
*outLen = 0;
@@ -536,7 +664,12 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
case ':':
tok = yajl_tok_colon;
goto lexed;
- case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+ case '\n':
+ if (lexer->allowSloppyFormat) {
+ tok = yajl_tok_eol;
+ goto lexed;
+ }
+ case '\t': case '\v': case '\f': case '\r': case ' ':
startOffset++;
break;
case 't': {
@@ -549,9 +682,7 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
c = readChar(lexer, jsonText, offset);
if (c != *want) {
unreadChar(lexer, offset);
- lexer->error = yajl_lex_invalid_string;
- tok = yajl_tok_error;
- goto lexed;
+ goto invalid;
}
} while (*(++want));
tok = yajl_tok_bool;
@@ -567,9 +698,7 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
c = readChar(lexer, jsonText, offset);
if (c != *want) {
unreadChar(lexer, offset);
- lexer->error = yajl_lex_invalid_string;
- tok = yajl_tok_error;
- goto lexed;
+ goto invalid;
}
} while (*(++want));
tok = yajl_tok_bool;
@@ -585,9 +714,7 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
c = readChar(lexer, jsonText, offset);
if (c != *want) {
unreadChar(lexer, offset);
- lexer->error = yajl_lex_invalid_string;
- tok = yajl_tok_error;
- goto lexed;
+ goto invalid;
}
} while (*(++want));
tok = yajl_tok_null;
@@ -636,8 +763,18 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
/* hit error or eof, bail */
goto lexed;
default:
- lexer->error = yajl_lex_invalid_char;
- tok = yajl_tok_error;
+ invalid:
+ if (!lexer->allowSloppyFormat) {
+ lexer->error = yajl_lex_invalid_char;
+ tok = yajl_tok_error;
+ goto lexed;
+ }
+ unreadChar(lexer, offset);
+ skipStrFixup = 1;
+ tok = yajl_lex_unquoted_string(lexer,
+ (const unsigned char *) jsonText,
+ jsonTextLen, offset);
+ unreadChar(lexer, offset);
goto lexed;
}
}
@@ -663,7 +800,8 @@ yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
}
/* special case for strings. skip the quotes. */
- if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
+ if ((tok == yajl_tok_string || tok == yajl_tok_string_with_escapes) &&
+ !skipStrFixup)
{
assert(*outLen >= 2);
(*outBuf)++;
View
4 src/yajl_lex.h
@@ -23,6 +23,7 @@ typedef enum {
yajl_tok_bool,
yajl_tok_colon,
yajl_tok_comma,
+ yajl_tok_eol,
yajl_tok_eof,
yajl_tok_error,
yajl_tok_left_brace,
@@ -49,7 +50,8 @@ typedef struct yajl_lexer_t * yajl_lexer;
yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc,
unsigned int allowComments,
- unsigned int validateUTF8);
+ unsigned int validateUTF8,
+ unsigned int allowSloppyFormat);
void yajl_lex_free(yajl_lexer lexer);
View
15 src/yajl_parser.c
@@ -204,6 +204,9 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
if (*offset != jsonTextLen) {
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
+ if (tok == yajl_tok_eol && hand->flags & yajl_allow_sloppy_format) {
+ goto around_again;
+ }
if (tok != yajl_tok_eof) {
yajl_bs_set(hand->stateStack, yajl_state_parse_error);
hand->parseError = "trailing garbage";
@@ -233,6 +236,8 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
offset, &buf, &bufLen);
switch (tok) {
+ case yajl_tok_eol:
+ goto around_again;
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
@@ -332,7 +337,8 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
break;
case yajl_tok_right_brace: {
if (yajl_bs_current(hand->stateStack) ==
- yajl_state_array_start)
+ yajl_state_array_start ||
+ hand->flags & yajl_allow_sloppy_format)
{
if (hand->callbacks &&
hand->callbacks->yajl_end_array)
@@ -381,6 +387,8 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
tok = yajl_lex_lex(hand->lexer, jsonText, jsonTextLen,
offset, &buf, &bufLen);
switch (tok) {
+ case yajl_tok_eol:
+ goto around_again;
case yajl_tok_eof:
return yajl_status_ok;
case yajl_tok_error:
@@ -403,7 +411,8 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
goto around_again;
case yajl_tok_right_bracket:
if (yajl_bs_current(hand->stateStack) ==
- yajl_state_map_start)
+ yajl_state_map_start ||
+ hand->flags & yajl_allow_sloppy_format)
{
if (hand->callbacks && hand->callbacks->yajl_end_map) {
_CC_CHK(hand->callbacks->yajl_end_map(hand->ctx));
@@ -448,6 +457,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
yajl_bs_pop(hand->stateStack);
goto around_again;
case yajl_tok_comma:
+ case yajl_tok_eol:
yajl_bs_set(hand->stateStack, yajl_state_map_need_key);
goto around_again;
case yajl_tok_eof:
@@ -476,6 +486,7 @@ yajl_do_parse(yajl_handle hand, const unsigned char * jsonText,
yajl_bs_pop(hand->stateStack);
goto around_again;
case yajl_tok_comma:
+ case yajl_tok_eol:
yajl_bs_set(hand->stateStack, yajl_state_array_need_val);
goto around_again;
case yajl_tok_eof:
Something went wrong with that request. Please try again.