Permalink
Browse files

improved ascii tables, tests..

  • Loading branch information...
1 parent 5c17218 commit a4c358f507ce3ebc64227ae352a816f2d6b4d0f2 @mnunberg committed Apr 3, 2012
Showing with 383 additions and 84 deletions.
  1. +1 −1 Makefile
  2. +17 −1 README.pod
  3. BIN json_samples.tgz
  4. +125 −46 jsonsl.c
  5. +27 −5 jsonsl.h
  6. +166 −27 srcutil/genchartables.pl
  7. +3 −0 tests/Makefile
  8. +22 −1 tests/json_test.c
  9. +22 −3 tests/unescape.c
View
2 Makefile
@@ -2,7 +2,7 @@ LIBJSONSL_DIR=$(shell pwd)
LDFLAGS=-L$(LIBJSONSL_DIR) -Wl,-rpath $(LIBJSONSL_DIR) -ljsonsl $(PROFILE)
CFLAGS=\
-Wall -std=gnu89 -pedantic \
- -O2 -ggdb3 \
+ -O3 -ggdb3 \
-I$(LIBJSONSL_DIR) -DJSONSL_STATE_GENERIC
export CFLAGS
View
18 README.pod
@@ -1,6 +1,6 @@
=head1 JSONSL
-JSON Stateful (or Simple, or Stacked) Lexer
+JSON Stateful (or Simple, or Stacked, or Searchable, or Streaming) Lexer
=head1 Why another (and yet another) JSON lexer?
@@ -56,6 +56,22 @@ initial stack structures are initialized
Just a C source file, and a corresponding header file. ANSI C.
+While attempts will be made to add functionality and reduce boilerplate
+in your code, the core functions are simple and clearly defined.
+
+Add-ons (see below) are available (and exist in the same jsonsl.c file)
+
+=item JSONPointer search add-on
+
+Use L<JSONPointer|http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-02>
+to query JSON streams as they arrive. Quite efficient, and very simple (see
+jpr_test.c for examples)
+
+=item Unescaping utility add-on
+
+Includes a nice little function which can flexibly unescape JSON
+strings to match your specifications.
+
=back
The rest of this documentation needs work
View
BIN json_samples.tgz
Binary file not shown.
View
171 jsonsl.c
@@ -28,6 +28,14 @@ static int *Allowed_Whitespace;
Allowed_Whitespace[(unsigned int)c & 0xff]
+/**
+ * This table contains allowed two-character escapes
+ * as per the RFC
+ */
+static int *Allowed_Escapes;
+#define is_allowed_escape(c) \
+ Allowed_Escapes[(unsigned int)c & 0xff]
+
JSONSL_API
jsonsl_t jsonsl_new(int nlevels)
{
@@ -108,7 +116,8 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
#define SPECIAL_POP \
CALLBACK_AND_POP(SPECIAL); \
- jsn->expecting = 0;
+ jsn->expecting = 0; \
+ jsn->tok_last = 0; \
#define SPECIAL_MAYBE_POP \
if (state->type == JSONSL_T_SPECIAL) { \
@@ -145,7 +154,9 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
/* Special escape handling for some stuff */
if (jsn->in_escape) {
jsn->in_escape = 0;
- if (*c == 'u') {
+ if (!is_allowed_escape(*c)) {
+ INVOKE_ERROR(ESCAPE_INVALID);
+ } else if (*c == 'u') {
CALLBACK(UESCAPE, UESCAPE);
if (jsn->return_UESCAPE) {
return;
@@ -165,7 +176,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
* XXX: for some reason GCC does not optimize the (*c >= 0x5d) condition
* very well?
*/
- if ( (*c >= 0x23 && *c != '\\') || (*c == 0x20) ) {
+ if ( ((*c >= 0x23 && *c != '\\') || (*c == 0x20)) && *c ) {
goto GT_NEXT;
} else if (*c == '"') {
/* terminator */
@@ -186,6 +197,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
* serves no other function).
*/
SPECIAL_POP;
+ jsn->expecting = ',';
goto GT_NEXT;
}
/**
@@ -219,6 +231,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
INVOKE_ERROR(MISSING_TOKEN);
}
jsn->expecting = ','; /* Can't figure out what to expect next */
+ jsn->tok_last = 0;
STACK_PUSH;
state->type = JSONSL_T_STRING;
@@ -243,6 +256,7 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
STACK_PUSH;
state->type = JSONSL_T_STRING;
jsn->expecting = ',';
+ jsn->tok_last = 0;
CALLBACK(STRING, PUSH);
goto GT_NEXT;
@@ -274,8 +288,9 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
if (jsn->expecting != *c) {
INVOKE_ERROR(STRAY_TOKEN);
}
- jsn->tok_last = *c;
+ jsn->tok_last = ':';
jsn->can_insert = 1;
+ jsn->expecting = '"';
goto GT_NEXT;
case ',':
@@ -302,7 +317,8 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
jsn->can_insert = 1;
}
- jsn->tok_last = *c;
+ jsn->tok_last = ',';
+ jsn->expecting = '"';
goto GT_NEXT;
/* new list or object */
@@ -336,9 +352,14 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
case '}':
case ']':
SPECIAL_MAYBE_POP;
+ if (jsn->tok_last == ',' && jsn->options.allow_trailing_comma == 0) {
+ INVOKE_ERROR(TRAILING_COMMA);
+ }
+
jsn->can_insert = 0;
jsn->level--;
jsn->expecting = ',';
+ jsn->tok_last = 0;
if (*c == ']') {
if (state->type != '[') {
INVOKE_ERROR(BRACKET_MISMATCH);
@@ -378,6 +399,9 @@ jsonsl_feed(jsonsl_t jsn, const jsonsl_char_t *bytes, size_t nbytes)
}
ENSURE_HVAL;
state->nelem++;
+ if (!jsn->can_insert) {
+ INVOKE_ERROR(CANT_INSERT);
+ }
STACK_PUSH;
state->type = JSONSL_T_SPECIAL;
state->special_flags = special_flags;
@@ -586,6 +610,7 @@ jsonsl_jpr_new(const char *path, jsonsl_error_t *errp)
ret->ncomponents = curidx;
ret->basestr = my_copy;
ret->orig = malloc(origlen);
+ ret->norig = origlen-1;
strcpy(ret->orig, path);
return ret;
@@ -800,44 +825,36 @@ const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match)
* Maps literal escape sequences with special meaning to their
* actual control codes (e.g.\n => 0x20)
*/
-static unsigned char Escape_Maps[0xff] = {
- /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
- /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */
- /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */
- /* 0x60 */ 0,0, /* 0x61 */
- /* 0x62 */ 8 /* b */, /* 0x62 */
- /* 0x63 */ 0,0,0, /* 0x65 */
- /* 0x66 */ 12 /* f */, /* 0x66 */
- /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
- /* 0x6e */ 32 /* n */, /* 0x6e */
- /* 0x6f */ 0,0,0, /* 0x71 */
- /* 0x72 */ 13 /* r */, /* 0x72 */
- /* 0x73 */ 0, /* 0x73 */
- /* 0x74 */ 9 /* t */, /* 0x74 */
- /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
- /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
- /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
- /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
- /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */
-};
+static unsigned char *Escape_Maps;
/**
* Utility function to convert escape sequences
*/
JSONSL_API
-size_t jsonsl_util_unescape(const char *in,
- char *out,
- size_t len,
- const int toEscape[127],
- jsonsl_error_t *err)
+size_t jsonsl_util_unescape_ex(const char *in,
+ char *out,
+ size_t len,
+ const int toEscape[128],
+ jsonsl_special_t *oflags,
+ jsonsl_error_t *err,
+ const char **errat)
{
const unsigned char *c = (const unsigned char*)in;
int in_escape = 0;
size_t origlen = len;
/* difference between the length of the input buffer and the output buffer */
size_t ndiff = 0;
+ if (oflags) {
+ *oflags = 0;
+ }
+#define UNESCAPE_BAIL(e,offset) \
+ *err = JSONSL_ERROR_##e; \
+ if (errat) { \
+ *errat = (const char*)(c+ (ssize_t)(offset)); \
+ } \
+ return 0;
+
for (; len; len--, c++, out++) {
unsigned int uesc_val[2];
-
if (in_escape) {
/* inside a previously ignored escape. Ignore */
in_escape = 0;
@@ -849,14 +866,16 @@ size_t jsonsl_util_unescape(const char *in,
goto GT_ASSIGN;
}
- if (len < 2 ||
- (toEscape[(unsigned char)c[1] & 0x7f] == 0
- && c[1] != '\\' && c[1] != '"' && *c > 0x1f)) {
- /* either no following character, or the following
- * character was not specified in the string table.
- * Note we always un-escape the characters which mandate escape
- * according to the json spec. (double-quote, reverse-solidus,
- * and control characters)
+ if (len < 2) {
+ UNESCAPE_BAIL(ESCAPE_INVALID, 0);
+ }
+ if (!is_allowed_escape(c[1])) {
+ UNESCAPE_BAIL(ESCAPE_INVALID, 1)
+ }
+ if ((toEscape[(unsigned char)c[1] & 0x7f] == 0 &&
+ c[1] != '\\' && c[1] != '"')) {
+ /* if we don't want to unescape this string, just continue with
+ * the escape flag set
*/
in_escape = 1;
goto GT_ASSIGN;
@@ -886,14 +905,12 @@ size_t jsonsl_util_unescape(const char *in,
/* Need at least six characters:
* { [0] = '\\', [1] = 'u', [2] = 'f', [3] = 'f', [4] = 'f', [5] = 'f' }
*/
- *err = JSONSL_ERROR_UESCAPE_TOOSHORT;
- return 0;
+ UNESCAPE_BAIL(UESCAPE_TOOSHORT, -1);
}
if (sscanf((const char*)(c+2), "%02x%02x", uesc_val, uesc_val+1) != 2) {
/* We treat the sequence as two octets */
- *err = JSONSL_ERROR_UESCAPE_TOOSHORT;
- return 0;
+ UNESCAPE_BAIL(UESCAPE_TOOSHORT, -1);
}
/* By now, we gobble up all the six bytes (current implied + 5 next
@@ -909,10 +926,16 @@ size_t jsonsl_util_unescape(const char *in,
* possible octets. Increment the diff counter by one.
*/
*out = uesc_val[1];
+ if (oflags && *(unsigned char*)out > 0x7f) {
+ *oflags |= JSONSL_SPECIALf_NONASCII;
+ }
ndiff++;
} else {
*(out++) = uesc_val[0];
*out = uesc_val[1];
+ if (oflags && (uesc_val[0] > 0x7f || uesc_val[1] > 0x7f)) {
+ *oflags |= JSONSL_SPECIALf_NONASCII;
+ }
}
continue;
@@ -933,7 +956,7 @@ size_t jsonsl_util_unescape(const char *in,
* This table contains the beginnings of non-string
* allowable (bareword) values.
*/
-static jsonsl_special_t _special_table[0xff] = {
+static jsonsl_special_t _special_table[0x100] = {
/* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
/* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2c */
/* 0x2d */ JSONSL_SPECIALf_SIGNED /* - */, /* 0x2d */
@@ -967,7 +990,7 @@ static jsonsl_special_t *Special_table = _special_table;
* Contains characters which signal the termination of any of the 'special' bareword
* values.
*/
-static int _special_endings[0xff] = {
+static int _special_endings[0x100] = {
/* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
/* 0x09 */ 1 /* <TAB> */, /* 0x09 */
/* 0x0a */ 1 /* <LF> */, /* 0x0a */
@@ -1000,7 +1023,7 @@ static int *Special_Endings = _special_endings;
/**
* Contains allowable whitespace.
*/
-static int _allowed_whitespace[0xff] = {
+static int _allowed_whitespace[0x100] = {
/* 0x00 */ 0,0,0,0,0,0,0,0,0, /* 0x08 */
/* 0x09 */ 1 /* <TAB> */, /* 0x09 */
/* 0x0a */ 1 /* <LF> */, /* 0x0a */
@@ -1017,3 +1040,59 @@ static int _allowed_whitespace[0xff] = {
/* 0xe1 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 /* 0xfe */
};
static int *Allowed_Whitespace = _allowed_whitespace;
+
+/**
+ * Allowable two-character 'common' escapes:
+ */
+static int _allowed_escapes[0x100] = {
+ /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
+ /* 0x20 */ 0,0, /* 0x21 */
+ /* 0x22 */ 1 /* <"> */, /* 0x22 */
+ /* 0x23 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x2e */
+ /* 0x2f */ 1 /* </> */, /* 0x2f */
+ /* 0x30 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x4f */
+ /* 0x50 */ 0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5b */
+ /* 0x5c */ 1 /* <\> */, /* 0x5c */
+ /* 0x5d */ 0,0,0,0,0, /* 0x61 */
+ /* 0x62 */ 1 /* <b> */, /* 0x62 */
+ /* 0x63 */ 0,0,0, /* 0x65 */
+ /* 0x66 */ 1 /* <f> */, /* 0x66 */
+ /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
+ /* 0x6e */ 1 /* <n> */, /* 0x6e */
+ /* 0x6f */ 0,0,0, /* 0x71 */
+ /* 0x72 */ 1 /* <r> */, /* 0x72 */
+ /* 0x73 */ 0, /* 0x73 */
+ /* 0x74 */ 1 /* <t> */, /* 0x74 */
+ /* 0x75 */ 1 /* <u> */, /* 0x75 */
+ /* 0x76 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x95 */
+ /* 0x96 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb5 */
+ /* 0xb6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd5 */
+ /* 0xd6 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf5 */
+ /* 0xf6 */ 0,0,0,0,0,0,0,0,0, /* 0xfe */
+};
+
+static int *Allowed_Escapes = _allowed_escapes;
+
+
+static unsigned char _escape_maps[0x100] = {
+ /* 0x00 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x1f */
+ /* 0x20 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x3f */
+ /* 0x40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x5f */
+ /* 0x60 */ 0,0, /* 0x61 */
+ /* 0x62 */ 8 /* <b> */, /* 0x62 */
+ /* 0x63 */ 0,0,0, /* 0x65 */
+ /* 0x66 */ 12 /* <f> */, /* 0x66 */
+ /* 0x67 */ 0,0,0,0,0,0,0, /* 0x6d */
+ /* 0x6e */ 10 /* <n> */, /* 0x6e */
+ /* 0x6f */ 0,0,0, /* 0x71 */
+ /* 0x72 */ 13 /* <r> */, /* 0x72 */
+ /* 0x73 */ 0, /* 0x73 */
+ /* 0x74 */ 9 /* <t> */, /* 0x74 */
+ /* 0x75 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x94 */
+ /* 0x95 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb4 */
+ /* 0xb5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xd4 */
+ /* 0xd5 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xf4 */
+ /* 0xf5 */ 0,0,0,0,0,0,0,0,0,0 /* 0xfe */
+};
+
+static unsigned char *Escape_Maps = _escape_maps;
View
32 jsonsl.h
@@ -154,6 +154,10 @@ typedef enum {
X(WEIRD_WHITESPACE) \
/* Found a \u-escape, but there were less than 4 following hex digits */ \
X(UESCAPE_TOOSHORT) \
+/* Invalid two-character escape */ \
+ X(ESCAPE_INVALID) \
+/* Trailing comma */ \
+ X(TRAILING_COMMA) \
/* The following are for JPR Stuff */ \
\
/* Found a literal '%' but it was only followed by a single valid hex digit */ \
@@ -391,6 +395,10 @@ struct jsonsl_st {
int return_UESCAPE;
/*@}*/
+ struct {
+ int allow_trailing_comma;
+ } options;
+
/** Put anything here */
void *data;
@@ -585,6 +593,7 @@ struct jsonsl_jpr_st {
/** The original match string. Useful for returning to the user */
char *orig;
+ size_t norig;
};
@@ -710,18 +719,31 @@ const char *jsonsl_strmatchtype(jsonsl_jpr_match_t match);
* @param toEscape - A sparse array of characters to unescape. Characters
* which are not present in this array, e.g. toEscape['c'] == 0 will be
* ignored and passed to the output in their original form.
+ * @param oflags If not null, and a \uXXXX escape expands to a non-ascii byte,
+ * then this variable will have the SPECIALf_NONASCII flag on.
+ *
* @param err A pointer to an error variable. If an error ocurrs, it will be
* set in this variable
+ * @param errat If not null and an error occurs, this will be set to point
+ * to the position within the string at which the offending character was
+ * encountered.
*
* @return The effective size of the output buffer.
*/
JSONSL_API
-size_t jsonsl_util_unescape(const char *in,
- char *out,
- size_t len,
- const int toEscape[127],
- jsonsl_error_t *err);
+size_t jsonsl_util_unescape_ex(const char *in,
+ char *out,
+ size_t len,
+ const int toEscape[128],
+ jsonsl_special_t *oflags,
+ jsonsl_error_t *err,
+ const char **errat);
+/**
+ * Convenience macro to avoid passing too many parameters
+ */
+#define jsonsl_util_unescape(in, out, len, toEscape, err) \
+ jsonsl_util_unescape_ex(in, out, len, toEscape, NULL, err, NULL)
#endif /* JSONSL_NO_JPR */
View
193 srcutil/genchartables.pl
@@ -5,12 +5,17 @@
use warnings;
use Getopt::Long;
-my @tokdefs;
-$tokdefs[ord('-')] = 'JSONSL_SPECIALf_SIGNED';
-$tokdefs[ord('t')] = 'JSONSL_SPECIALf_TRUE';
-$tokdefs[ord('f')] = 'JSONSL_SPECIALf_FALSE';
-$tokdefs[ord('n')] = 'JSONSL_SPECIALf_NULL';
-$tokdefs[ord($_)] = 'JSONSL_SPECIALf_UNSIGNED' for (0..9);
+################################################################################
+################################################################################
+### Character Table Definitions ###
+################################################################################
+################################################################################
+my @special_begin;
+$special_begin[ord('-')] = 'JSONSL_SPECIALf_SIGNED';
+$special_begin[ord('t')] = 'JSONSL_SPECIALf_TRUE';
+$special_begin[ord('f')] = 'JSONSL_SPECIALf_FALSE';
+$special_begin[ord('n')] = 'JSONSL_SPECIALf_NULL';
+$special_begin[ord($_)] = 'JSONSL_SPECIALf_UNSIGNED' for (0..9);
my @strdefs;
$strdefs[ord('\\')] = 1;
@@ -45,22 +50,31 @@
my @unescapes;
$unescapes[ord('t')] = 0x09;
$unescapes[ord('b')] = 0x08;
-$unescapes[ord('n')] = 0x20;
+$unescapes[ord('n')] = 0x0a;
$unescapes[ord('f')] = 0x0c;
$unescapes[ord('r')] = 0x0d;
+my @allowed_escapes;
+{
+ @allowed_escapes[ord($_)] = 1 foreach
+ ('"', '\\', '/', 'b', 'f', 'n', 'r', 't', 'u');
+}
-my @lines;
-my $cur = { begin => 0, items => [], end => 0 };
-push @lines, $cur;
+
+################################################################################
+################################################################################
+### CLI Options ###
+################################################################################
+################################################################################
my %HMap = (
- special => [ undef, \@tokdefs ],
+ special => [ undef, \@special_begin ],
strings => [ undef, \@strdefs ],
special_end => [ undef, \@special_end ],
special_body => [undef, \@special_body ],
whitespace => [ undef, \@wstable ],
unescapes => [undef, \@unescapes],
+ allowed_escapes => [ undef, \@allowed_escapes]
);
my $Table;
@@ -81,6 +95,146 @@
die("Please specify one of: " . join(",", keys %HMap));
}
+################################################################################
+################################################################################
+### Logic ###
+################################################################################
+################################################################################
+my %PrettyMap = (
+"\x00" => '<NUL>',
+"\x01" => '<SOH>',
+"\x02" => '<STX>',
+"\x03" => '<ETX>',
+"\x04" => '<EOT>',
+"\x05" => '<ENQ>',
+"\x06" => '<ACK>',
+"\x07" => '<BEL>',
+"\x08" => '<BS>',
+"\x09" => '<HT>',
+"\x0a" => '<LF>',
+"\x0b" => '<VT>',
+"\x0c" => '<FF>',
+"\x0d" => '<CR>',
+"\x0e" => '<SO>',
+"\x0f" => '<SI>',
+"\x10" => '<DLE>',
+"\x11" => '<DC1>',
+"\x12" => '<DC2>',
+"\x13" => '<DC3>',
+"\x14" => '<DC4>',
+"\x15" => '<NAK>',
+"\x16" => '<SYN>',
+"\x17" => '<ETB>',
+"\x18" => '<CAN>',
+"\x19" => '<EM>',
+"\x1a" => '<SUB>',
+"\x1b" => '<ESC>',
+"\x1c" => '<FS>',
+"\x1d" => '<GS>',
+"\x1e" => '<RS>',
+"\x1f" => '<US>',
+"\x20" => '<SP>',
+"\x21" => '<!>',
+"\x22" => '<">',
+"\x23" => '<#>',
+"\x24" => '<$>',
+"\x25" => '<%>',
+"\x26" => '<&>',
+"\x27" => '<\'>',
+"\x28" => '<(>',
+"\x29" => '<)>',
+"\x2a" => '<*>',
+"\x2b" => '<+>',
+"\x2c" => '<,>',
+"\x2d" => '<->',
+"\x2e" => '<.>',
+"\x2f" => '</>',
+"\x30" => '<0>',
+"\x31" => '<1>',
+"\x32" => '<2>',
+"\x33" => '<3>',
+"\x34" => '<4>',
+"\x35" => '<5>',
+"\x36" => '<6>',
+"\x37" => '<7>',
+"\x38" => '<8>',
+"\x39" => '<9>',
+"\x3a" => '<:>',
+"\x3b" => '<;>',
+"\x3c" => '<<>',
+"\x3d" => '<=>',
+"\x3e" => '<>>',
+"\x3f" => '<?>',
+"\x40" => '<@>',
+"\x41" => '<A>',
+"\x42" => '<B>',
+"\x43" => '<C>',
+"\x44" => '<D>',
+"\x45" => '<E>',
+"\x46" => '<F>',
+"\x47" => '<G>',
+"\x48" => '<H>',
+"\x49" => '<I>',
+"\x4a" => '<J>',
+"\x4b" => '<K>',
+"\x4c" => '<L>',
+"\x4d" => '<M>',
+"\x4e" => '<N>',
+"\x4f" => '<O>',
+"\x50" => '<P>',
+"\x51" => '<Q>',
+"\x52" => '<R>',
+"\x53" => '<S>',
+"\x54" => '<T>',
+"\x55" => '<U>',
+"\x56" => '<V>',
+"\x57" => '<W>',
+"\x58" => '<X>',
+"\x59" => '<Y>',
+"\x5a" => '<Z>',
+"\x5b" => '<[>',
+"\x5c" => '<\>',
+"\x5d" => '<]>',
+"\x5e" => '<^>',
+"\x5f" => '<_>',
+"\x60" => '<`>',
+"\x61" => '<a>',
+"\x62" => '<b>',
+"\x63" => '<c>',
+"\x64" => '<d>',
+"\x65" => '<e>',
+"\x66" => '<f>',
+"\x67" => '<g>',
+"\x68" => '<h>',
+"\x69" => '<i>',
+"\x6a" => '<j>',
+"\x6b" => '<k>',
+"\x6c" => '<l>',
+"\x6d" => '<m>',
+"\x6e" => '<n>',
+"\x6f" => '<o>',
+"\x70" => '<p>',
+"\x71" => '<q>',
+"\x72" => '<r>',
+"\x73" => '<s>',
+"\x74" => '<t>',
+"\x75" => '<u>',
+"\x76" => '<v>',
+"\x77" => '<w>',
+"\x78" => '<x>',
+"\x79" => '<y>',
+"\x7a" => '<z>',
+"\x7b" => '<{>',
+"\x7c" => '<|>',
+"\x7d" => '<}>',
+"\x7e" => '<~>',
+"\x7f" => '<DEL>',
+);
+
+my @lines;
+my $cur = { begin => 0, items => [], end => 0 };
+push @lines, $cur;
+
my $i = 0;
my $cur_col = 0;
@@ -119,22 +273,7 @@ sub add_special {
for (; $i < 255; $i++) {
my $v = $Table->[$i];
if (defined $v) {
- my $char_pretty;
- if ($i == 0xa) {
- $char_pretty = '<LF>';
- } elsif ($i == 0xd) {
- $char_pretty = '<CR>';
- } elsif ($i == 0x9) {
- $char_pretty = '<TAB>';
- } elsif ($i == 0x20) {
- $char_pretty = '<SP>';
- } elsif ($i == 0x08) {
- $char_pretty = '<BS>';
- } elsif ($i == 0x0c) {
- $char_pretty = '<FF>';
- } else {
- $char_pretty = chr($i);
- }
+ my $char_pretty = $PrettyMap{chr($i)};
$v = sprintf("$v /* %s */", $char_pretty);
add_special($v);
} else {
View
3 tests/Makefile
@@ -4,6 +4,9 @@ all: $(TESTMODS)
./json_test ../share/*
./jpr_test
./unescape
+ ./json_test ../share/jsc/pass*.json
+ JSONSL_FAIL_TESTS=1 ./json_test ../share/jsc/fail*.json
+ @echo "All Tests OK"
%: %.c
$(CC) $(CFLAGS) $^ -o $@ $(LDFLAGS)
View
23 tests/json_test.c
@@ -6,6 +6,8 @@
#include <sys/stat.h>
#include <assert.h>
+static int WantFail = 0;
+static jsonsl_error_t WantError = 0;
void fmt_level(const char *buf, size_t nbuf, int levels)
{
@@ -62,6 +64,12 @@ int error_callback(jsonsl_t jsn,
* and maybe 'correct' and seek ahead of the buffer, and try to
* do some correction.
*/
+ if (WantFail) {
+ printf("Got error %s (PASS)\n", jsonsl_strerror(err));
+ WantError = err;
+ return 0;
+ }
+
fprintf(stderr, "Got parse error at '%c', pos %lu\n", *errat, jsn->pos);
fprintf(stderr, "Error is %s\n", jsonsl_strerror(err));
fprintf(stderr, "Remaining text: %s\n", errat);
@@ -78,6 +86,7 @@ void parse_single_file(const char *path)
size_t nread = 0;
int fd;
jsonsl_t jsn;
+ WantError = 0;
/* open our file */
fd = open(path, O_RDONLY);
@@ -87,6 +96,11 @@ void parse_single_file(const char *path)
}
status = fstat(fd, &sb);
+ if (S_ISDIR(sb.st_mode)) {
+ fprintf(stderr, "\tS_ISDIR..\n");
+ close(fd);
+ return;
+ }
assert(status == 0);
assert(sb.st_size);
assert(sb.st_size < 0x1000000);
@@ -121,7 +135,10 @@ void parse_single_file(const char *path)
jsonsl_feed(jsn, bufp, nread);
bufp += nread;
}
-
+ if (WantFail && WantError == 0) {
+ fprintf(stderr, "Expected error but didn't find any!\n");
+ abort();
+ }
jsonsl_destroy(jsn);
free(buf);
}
@@ -132,6 +149,10 @@ int main(int argc, char **argv)
if (getenv("JSONSL_QUIET_TESTS")) {
freopen("/dev/null", "w", stdout);
}
+ if (getenv("JSONSL_FAIL_TESTS")) {
+ printf("Want Fail..\n");
+ WantFail = 1;
+ }
if (argc < 2) {
fprintf(stderr, "Usage: %s FILES..\n", argv[0]);
exit(EXIT_FAILURE);
View
25 tests/unescape.c
@@ -46,15 +46,23 @@ void test_null_escape(void)
void test_multibyte_escape(void)
{
int mbres;
+ jsonsl_special_t flags;
wchar_t dest[4]; /* שלום */
escaped = "\\uD7A9\\uD79C\\uD795\\uD79D";
strtable['u'] = 1;
out = malloc(strlen(escaped));
- res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err);
+ res = jsonsl_util_unescape_ex(escaped,
+ out,
+ strlen(escaped),
+ strtable,
+ &flags,
+ &err,
+ NULL);
assert(res == 8);
mbres = mbstowcs(dest, out, 4);
assert(memcmp(L"שלום", dest,
8) == 0);
+ assert(flags & JSONSL_SPECIALf_NONASCII);
free(out);
}
@@ -63,15 +71,15 @@ void test_multibyte_escape(void)
*/
void test_ignore_escape(void)
{
- escaped = "Some \\Weird String";
+ escaped = "Some \\nWeird String";
out = malloc(strlen(escaped)+1);
strtable['W'] = 0;
res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err);
out[res] = '\0';
assert(res == strlen(escaped));
assert(strncmp(escaped, out, res) == 0);
- escaped = "\\A String";
+ escaped = "\\tA String";
res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err);
out[res] = '\0';
assert(res == strlen(escaped));
@@ -95,6 +103,16 @@ void test_replacement_escape(void)
free(out);
}
+void test_invalid_escape(void)
+{
+ escaped = "\\invalid \\escape";
+ out = malloc(strlen(escaped)+1);
+ res = jsonsl_util_unescape(escaped, out, strlen(escaped), strtable, &err);
+ assert(res == 0);
+ assert(err == JSONSL_ERROR_ESCAPE_INVALID);
+ free(out);
+}
+
int main(void)
{
char *curlocale = setlocale(LC_ALL, "");
@@ -107,5 +125,6 @@ int main(void)
}
test_ignore_escape();
test_replacement_escape();
+ test_invalid_escape();
return 0;
}

0 comments on commit a4c358f

Please sign in to comment.