Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Work on tuba, object_drip

  • Loading branch information...
commit 288c839fdb9e9b2a215460eb66468789a92c00bc 1 parent 36b5369
@mnunberg authored
View
2  Changes
@@ -1,4 +1,6 @@
Revision history for JSON-SL
+0.02 april 4 2012
+ Added object drip, some more work on tuba
0.01 Date/time
First version, released on an unsuspecting world.
View
5 MANIFEST
@@ -5,6 +5,9 @@ README
lib/JSON/SL.pm
lib/JSON/SL/EscapeTH.pm
+lib/JSON/SL/Tuba.pm
+
+
perl-jsonsl.h
jsonxs_inline.h
@@ -30,5 +33,7 @@ t/00-load.t
t/08-pc_esc.t
t/03-unescape.t
t/06-utf8.t
+t/20-object_drip.t
eg/bench.pl
+eg/synopsis.pl
View
5 MANIFEST.SKIP
@@ -5,3 +5,8 @@ log\.*
#ignore komodo project file
\.kpf
+
+\.git.+
+
+\.swp
+dump.+
View
2  Makefile.PL
@@ -17,7 +17,7 @@ WriteMakefile(
? ('LICENSE'=> 'perl')
: ()),
PL_FILES => {},
- OPTIMIZE => '-Wall -ggdb3 -O3',
+# OPTIMIZE => '-Wall -ggdb3 -O3',
PREREQ_PM => {
'Test::More' => 0,
},
View
180 SL.xs
@@ -29,7 +29,7 @@ static int PLJSONSL_Escape_Table_dfl[0x80];
ESCTBL['t'] = 1;
-#if PERL_VERSION >= 10
+#ifdef PLJSONSL_HAVE_HV_COMMON
#define pljsonsl_hv_storeget_he(pjsn, hv, buf, len, value) \
hv_common((HV*)(hv), NULL, buf, len, 0, HV_FETCH_ISSTORE, value, 0)
@@ -39,7 +39,6 @@ static int PLJSONSL_Escape_Table_dfl[0x80];
#define PLJSONSL_DESTROY_KSV(...)
#else
-#warning "You are using a Perl from the stone age. This code might work.."
/* probably very dangerous, but the beginning of hv_store_common
* looks quite simple...
*/
@@ -97,7 +96,7 @@ pljsonsl_hv_delete_okey_THX(pTHX_
#warning "Using our own HeUTF8 as HeKUTF8"
#define HeUTF8(he) HeKUTF8(he)
-#endif /* 5.10 */
+#endif /* HAVE_HV_COMMON */
#define GET_STATE_BUFFER(pjsn, pos) \
@@ -122,9 +121,11 @@ object_mkresult_THX(pTHX_
(void)hv_stores(info_hv, PLJSONSL_INFO_KEY_##b, v)
HV *info_hv;
- if (child->matchres != JSONSL_MATCH_COMPLETE || child->type == JSONSL_T_HKEY) {
+ if (pjsn->options.object_drip == 0 &&
+ (child->matchres != JSONSL_MATCH_COMPLETE || child->type == JSONSL_T_HKEY)) {
return 0;
}
+
info_hv = newHV();
if (SvTYPE(child->sv) == SVt_PVHV || SvTYPE(child->sv) == SVt_PVAV) {
STORE_INFO(VALUE, newRV_noinc(child->sv));
@@ -132,7 +133,7 @@ object_mkresult_THX(pTHX_
STORE_INFO(VALUE, child->sv);
}
- if (pjsn->options.noqstr == 0) {
+ if (pjsn->options.noqstr == 0 && pjsn->options.object_drip == 0) {
STORE_INFO(QUERY, newSVpvn_share(child->matchjpr->orig,
child->matchjpr->norig, 0));
}
@@ -170,7 +171,7 @@ object_mkresult_THX(pTHX_
* and hash types are always added to their parents, even if they
* are a complete match to be removed from the stack.
*/
- if (parent->sv) {
+ if (parent && parent->sv) {
SvREADONLY_off(parent->sv);
SvREFCNT_inc_simple_void_NN(child->sv);
if (parent->type == JSONSL_T_LIST) {
@@ -184,7 +185,8 @@ object_mkresult_THX(pTHX_
kbuf, klen,
G_DISCARD,
HeHASH(child->u_loc.key));
- /* is a macro for:
+ /* for perls with hv_common, the above should be a macro for this: */
+#if 0
hv_common((HV*)parent->sv,
NULL,
kbuf, klen,
@@ -192,7 +194,7 @@ object_mkresult_THX(pTHX_
HV_DELETE|G_DISCARD,
NULL,
HeHASH(child->u_loc.key));
- */
+#endif
child->u_loc.key = NULL;
}
@@ -217,13 +219,11 @@ process_special_THX(pTHX_
#define MAKE_BOOLEAN_BLESSED_IV(v) \
{ SV *newiv = newSViv(v); newsv = newRV_noinc(newiv); sv_bless(newsv, pjsn->stash_boolean); } \
+ int ndigits;
- switch (state->special_flags) {
- case JSONSL_SPECIALf_UNSIGNED:
- case JSONSL_SPECIALf_SIGNED:
- newsv = jsonxs_inline_process_number(buf);
- break;
+
+ switch (state->special_flags) {
case JSONSL_SPECIALf_TRUE:
if (state->pos_cur - state->pos_begin != 4) {
die("Expected 'true'");
@@ -245,7 +245,46 @@ process_special_THX(pTHX_
newsv = &PL_sv_undef;
break;
+ /* Simple signed/unsigned numbers, no exponents or fractions to worry about */
+ case JSONSL_SPECIALf_UNSIGNED:
+ ndigits = state->pos_cur - state->pos_begin;
+ if (ndigits == 1) {
+ newsv = newSVuv(state->nelem);
+ break;
+ } /* else, ndigits > 1 */
+ if (*buf == '0') {
+ die("JSON::SL - Malformed number (leading zero for non-fraction)");
+ }
+ if (ndigits < UV_DIG) {
+ newsv = newSVuv(state->nelem);
+ break;
+ } /* else, potential overflow */
+ newsv = jsonxs_inline_process_number(buf);
+ break;
+
+ case JSONSL_SPECIALf_SIGNED:
+ ndigits = (state->pos_cur - state->pos_begin)-1;
+ if (ndigits == 0) {
+ die("JSON::SL - Found lone '-'");
+ }
+ if (buf[1] == '0') {
+ die("JSON::SL - Malformed number (zero after '-'");
+ }
+
+ if (ndigits < (IV_DIG-1)) {
+ newsv = newSViv(-((IV)state->nelem));
+ break;
+ } /*else */
+ newsv = jsonxs_inline_process_number(buf);
+ break;
+
+
+
default:
+ if (state->special_flags & (JSONSL_SPECIALf_FLOAT|JSONSL_SPECIALf_EXPONENT)) {
+ newsv = jsonxs_inline_process_number(buf);
+ break;
+ }
warn("Buffer is %p", buf);
warn("Length is %lu", state->pos_cur - state->pos_begin);
warn("Special flag is %d", state->special_flags);
@@ -495,7 +534,9 @@ static void body_pop_callback(jsonsl_t jsn,
#undef INSERT_STRING
if (state->sv == pjsn->root && pjsn->njprs == 0) {
- av_push(pjsn->results, newRV_noinc(pjsn->root));
+ if (!pjsn->options.object_drip) {
+ av_push(pjsn->results, newRV_noinc(pjsn->root));
+ } /* otherwise, already pushed */
pjsn->root = NULL;
jsn->action_callback_PUSH = initial_callback;
}
@@ -609,7 +650,7 @@ pljsonsl_get_and_initialize_global(pTHX)
jsonsl_enable_all_callbacks(pjsn->jsn);
pjsn->jsn->error_callback = error_callback;
pjsn->jsn->action_callback_PUSH = initial_callback;
- pjsn->results = sv_2mortal((SV*)newAV());
+ pjsn->results = (AV*)sv_2mortal((SV*)newAV());
return pjsn;
}
@@ -713,8 +754,9 @@ pltuba_invoke_callback_THX(pTHX_ PLTUBA *tuba,
ENTER;
SAVETMPS;
PUSHMARK(SP);
+ XPUSHs(tuba->selfrv);
XPUSHs(sv_2mortal(newSViv(action)));
- XPUSHs(sv_2mortal(newSViv(cbtype)));
+ XPUSHs(sv_2mortal(newSViv(cbtype & 0x7f)));
if (mextrasv) {
XPUSHs(sv_2mortal(mextrasv));
}
@@ -736,20 +778,24 @@ pltuba_flush_characters_THX(pTHX_ PLTUBA *tuba, size_t until)
size_t toFlush;
const char *buf;
SV *chunksv;
-
- if (until <= tuba->buf_pos) {
+ if (!tuba->keep_pos) {
return;
}
- toFlush = until - tuba->buf_pos;
+ toFlush = until - tuba->keep_pos;
+ buf = GET_STATE_BUFFER(tuba, tuba->keep_pos);
+
+ if (tuba->shift_quote) {
+ buf++;
+ toFlush--;
+ }
+
+ tuba->keep_pos = 0;
+ tuba->shift_quote = 0;
if (toFlush == 0) {
return;
}
-
- toFlush -= tuba->chardata_begin_offset;
- buf = SvPVX_const(tuba->buf);
- buf += tuba->chardata_begin_offset;
chunksv = newSVpvn(buf, toFlush);
pltuba_invoke_callback(tuba,
JSONSL_ACTION_PUSH,
@@ -758,46 +804,61 @@ pltuba_flush_characters_THX(pTHX_ PLTUBA *tuba, size_t until)
/**
* SV has been mortalized by the invoke_callback function
*/
- sv_chop(tuba->buf, buf);
- tuba->buf_pos = until;
}
-
/**
- * Unified callback
+ * Push callback. This is easy because we never actually do any character
+ * data here.
*/
static void
-pltuba_jsonsl_callback(jsonsl_t jsn,
- jsonsl_action_t action,
- struct jsonsl_state_st *state,
- const char *at)
+pltuba_jsonsl_push_callback(jsonsl_t jsn,
+ jsonsl_action_t action,
+ struct jsonsl_state_st *state,
+ const char *at)
{
- /* Figure out the difference between now and the last callback */
PLTUBA *tuba = (PLTUBA*)jsn->data;
PLJSONSL_dTHX(tuba);
- size_t pos = (action == JSONSL_ACTION_POP) ? state->pos_cur : state->pos_begin;
+ if (state->level == 1) {
+ pltuba_invoke_callback(tuba, action, PLTUBA_CALLBACK_DOCUMENT, NULL);
+ }
- if (pos && pos > tuba->last_cb_pos) {
- pltuba_flush_characters(tuba, pos);
+#define X(o,c) \
+ if (state->type == JSONSL_T_##o) { \
+ pltuba_invoke_callback(tuba, action, PLTUBA_CALLBACK_##o, NULL); \
}
- tuba->last_cb_pos = pos;
- /* Piece together a callback specification */
- if (state->type & JSONSL_Tf_STRINGY) {
- tuba->chardata_begin_offset = 1;
+ JSONSL_XTYPE;
+#undef X
+ if (!JSONSL_STATE_IS_CONTAINER(state)) {
+ tuba->keep_pos = state->pos_begin;
+ if (state->type & JSONSL_Tf_STRINGY) {
+ tuba->shift_quote = 1;
+ }
} else {
- tuba->chardata_begin_offset = 0;
+ tuba->keep_pos = 0;
+ }
+}
+
+static void
+pltuba_jsonsl_pop_callback(jsonsl_t jsn,
+ jsonsl_action_t action,
+ struct jsonsl_state_st *state,
+ const char *at)
+{
+ PLTUBA *tuba = (PLTUBA*)jsn->data;
+ PLJSONSL_dTHX(tuba);
+
+ if ((state->type & JSONSL_Tf_STRINGY)
+ || state->type == JSONSL_T_SPECIAL) {
+ pltuba_flush_characters(tuba, state->pos_cur);
}
#define X(o,c) \
if (state->type == JSONSL_T_##o) { \
pltuba_invoke_callback(tuba, action, PLTUBA_CALLBACK_##o, NULL); \
- goto GT_CB_DONE; \
}
JSONSL_XTYPE;
#undef X
- if (state->level == 1 && action == JSONSL_ACTION_POP) {
+ if (state->level == 1) {
pltuba_invoke_callback(tuba, action, PLTUBA_CALLBACK_DOCUMENT, NULL);
}
- GT_CB_DONE:
- return;
}
static int
@@ -822,13 +883,15 @@ pltuba_feed_THX(pTHX_ PLTUBA *tuba, SV *input)
die("Input is not string!");
}
tuba->buf = input;
- tuba->buf_pos = tuba->jsn->pos;
+ tuba->pos_min_valid = tuba->jsn->pos;
SvREADONLY_on(input);
jsonsl_feed(tuba->jsn, SvPVX_const(input), SvCUR(input));
- if (SvCUR(input)) {
- /* Assume the rest is 'character' data */
- pltuba_flush_characters(tuba, SvCUR(input) + tuba->buf_pos);
+ if (tuba->keep_pos) {
+ int old_shift = tuba->shift_quote;
+ pltuba_flush_characters(tuba, tuba->jsn->pos);
+ tuba->keep_pos = tuba->jsn->pos;
+ tuba->shift_quote = old_shift;
}
SvREADONLY_off(input);
}
@@ -837,7 +900,8 @@ pltuba_feed_THX(pTHX_ PLTUBA *tuba, SV *input)
X(noqstr) \
X(nopath) \
X(utf8) \
- X(max_size)
+ X(max_size) \
+ X(object_drip)
enum {
@@ -1034,6 +1098,7 @@ PLJSONSL__option(PLJSONSL *pjsn, ...)
nopath = OPTION_IX_nopath
noqstr = OPTION_IX_noqstr
max_size = OPTION_IX_max_size
+ object_drip = OPTION_IX_object_drip
CODE:
RETVAL = 0;
@@ -1134,7 +1199,7 @@ PLJSONSL_decode_json(SV *input)
pjsn->jsn->action_callback_PUSH = initial_callback;
RETURN_RESULTS(pjsn);
- if (!result_count) {
+ if (result_count == 0 && av_len(pjsn->results) == -1) {
die("Incomplete JSON string?");
}
@@ -1188,25 +1253,28 @@ PLJSONSL_CLONE(PLJSONSL *pjsn)
MODULE = JSON::SL PACKAGE = JSON::SL::Tuba PREFIX = PLTUBA_
SV *
-PLTUBA__initialize(SV *pkg)
+PLTUBA__initialize(const char *pkg)
PREINIT:
PLTUBA *tuba;
SV *ptriv, *retrv;
+ HV *subclass;
dMY_CXT;
-
CODE:
- (void)pkg;
-
+ subclass = gv_stashpv(pkg, GV_ADD);
Newxz(tuba, 1, PLTUBA);
tuba->jsn = jsonsl_new(PLJSONSL_MAX_DEFAULT);
ptriv = newSViv(PTR2IV(tuba));
retrv = newRV_noinc(ptriv);
- sv_bless(retrv, MY_CXT.stash_tuba);
+ sv_bless(retrv, subclass);
- tuba->jsn->action_callback = pltuba_jsonsl_callback;
+ tuba->selfrv = newRV_inc(ptriv);
+ sv_rvweaken(tuba->selfrv);
+ tuba->jsn->action_callback_PUSH = pltuba_jsonsl_push_callback;
+ tuba->jsn->action_callback_POP = pltuba_jsonsl_pop_callback;
tuba->jsn->error_callback = pltuba_jsonsl_error_callback;
jsonsl_enable_all_callbacks(tuba->jsn);
PLJSONSL_mkTHX(tuba);
+ tuba->jsn->data = tuba;
RETVAL = retrv;
OUTPUT: RETVAL
View
102 eg/bench.pl
@@ -5,8 +5,11 @@
use JSON::SL;
use Data::Dumper::Concise;
use JSON::XS qw(decode_json);
-use Time::HiRes qw(time);
use Getopt::Long;
+use Benchmark qw(:all);
+use Carp qw(confess);
+
+$SIG{__DIE__} = \&confess;
GetOptions(
'i|iterations=i' => \my $Iterations,
@@ -17,6 +20,7 @@
'f|file=s' => \my $TESTFILE,
'r|recursion=i' => \my $RecursionLimit,
'd|dump' => \my $DumpTree,
+ 'U|no-unescape' => \my $DontUnescape,
'h|help' => \my $PrintHelp
);
@@ -31,6 +35,7 @@
-c --chunked=SIZE Test incremental chunks of SIZE bytes
-r --recursion=LEVEL Set JSON::SL recursion limit
-d --dump Dump object tree on completion
+ -U --no-unescape Don't have JSON::SL unescape strings
EOF
exit(1);
}
@@ -60,28 +65,10 @@
print Dumper(\@all);
print Dumper($o->root);
}
-my ($begin,$duration);
-
-if ($TestJsonxs) {
- $begin = time();
- foreach (0..$Iterations) {
- my $res = decode_json($txt);
- }
- $duration = time() - $begin;
- printf("$Iterations Iterations: JSON::XS %0.2f\n", $duration);
-}
-
-if ($TestJsonsl) {
- $begin = time();
- foreach (0..$Iterations) {
- my $res = JSON::SL::decode_json($txt);
- }
- my $duration = time() - $begin;
- printf("$Iterations Iterations: JSON::SL %0.2f\n", $duration);
-}
my @Chunks;
if ($TestChunks) {
+ printf("Splitting file into chunks..\n");
my $copy = $txt;
while ($copy) {
my $len = length($copy);
@@ -91,37 +78,50 @@
$copy = substr($copy, $chunk);
push @Chunks, $frag;
}
-
- printf("Testing chunked/incremental parsing with %d %dB chunks\n",
- scalar @Chunks, $TestChunks);
- if ($TestJsonxs) {
- $begin = time();
- my $xs = JSON::XS->new();
- for (0..$Iterations) {
- $xs->incr_reset();
- foreach my $chunk (@Chunks) {
- last if !$chunk;
- my @o = $xs->incr_parse($chunk);
- }
- }
- $duration = time() - $begin;
- printf("$Iterations iterations: JSON::XS %0.2f\n",
- $duration);
+}
+
+my $sl_incr = JSON::SL->new($RecursionLimit);
+my $xs_incr = JSON::XS->new();
+
+sub jsonsl_complete {
+ JSON::SL::decode_json($txt);
+}
+
+sub jsonsl_incr {
+ $sl_incr->reset();
+ my $res = $sl_incr->feed($_) for @Chunks;
+}
+
+sub jsonxs_complete {
+ JSON::XS::decode_json($txt);
+}
+
+sub jsonxs_incr {
+ $xs_incr->incr_reset();
+ my $res = $xs_incr->incr_parse($_) for @Chunks;
+}
+
+my %SimpleTests;
+my %IncrTests;
+
+if ($TestJsonxs) {
+ $SimpleTests{'JSON::XS decode_json'} = \&jsonxs_complete;
+ if (@Chunks) {
+ $IncrTests{'JSON::XS incr_parse'} = \&jsonxs_incr;
}
-
- if ($TestJsonsl) {
- my $sl = JSON::SL->new($RecursionLimit);
- $begin = time();
-
- for (0..$Iterations) {
- $sl->reset();
- foreach my $chunk (@Chunks) {
- last if !$chunk;
- my @o = $sl->feed($chunk);
- }
- }
- $duration = time() - $begin;
- printf("$Iterations iterations: JSON::SL %0.2f\n",
- $duration);
+}
+
+if ($TestJsonsl) {
+ $SimpleTests{'JSON::SL decode_json'} = \&jsonsl_complete;
+ if (@Chunks) {
+ $IncrTests{'JSON::SL feed'} = \&jsonsl_incr;
}
+}
+
+printf("Running decode_json tests with input of %d bytes\n", length($txt));
+cmpthese($Iterations, \%SimpleTests);
+if (@Chunks) {
+
+ print "Running incremental tests\n";
+ cmpthese($Iterations, \%IncrTests);
}
View
38 eg/synopsis.pl
@@ -0,0 +1,38 @@
+use JSON::SL;
+use Data::Dumper;
+
+my $txt = <<'EOT';
+{
+ "some" : {
+ "partial" : 42.42
+ },
+ "other" : {
+ "partial" : "a string"
+ },
+ "complex" : {
+ "partial": {
+ "a key" : "a value"
+ }
+ },
+ "more" : {
+ "more" : "stuff"
+EOT
+
+my $json = JSON::SL->new();
+my $jpath = "/^/partial";
+$json->set_jsonpointer( [$jpath] );
+my @results = $json->feed($txt);
+
+foreach my $result (@results) {
+ printf("== Got result (path %s) ==\n", $result->{Path});
+ printf("Query was %s\n", $result->{JSONPointer});
+ my $value = $result->{Value};
+ if (!ref $value) {
+ printf("Got scalar value %s\n", $value);
+ } else {
+ printf("Got reference:\n");
+ print Dumper($value);
+ }
+ print "\n";
+}
+
View
157 lib/JSON/SL.pm
@@ -7,7 +7,7 @@ use base qw(Exporter);
our @EXPORT_OK = qw(decode_json unescape_json_string);
BEGIN {
- $VERSION = '0.0_1';
+ $VERSION = '0.0_2';
require XSLoader;
XSLoader::load(__PACKAGE__, $VERSION);
}
@@ -43,29 +43,61 @@ JSON::SL - Fast, Streaming, and Searchable JSON decoder.
=head1 SYNOPSIS
- # An incomplete stream..
+ use JSON::SL;
+ use Data::Dumper;
- my $txt = <<'EOT'
+ my $txt = <<'EOT';
{
"some" : {
- "partial" : [42]
+ "partial" : 42.42
},
"other" : {
"partial" : "a string"
},
+ "complex" : {
+ "partial": {
+ "a key" : "a value"
+ }
+ },
"more" : {
- "more" :
+ "more" : "stuff"
EOT
- my $json = JSON::SL->new(512);
- $json->set_jsonpointer( ["/some/^/partial"] );
+ my $json = JSON::SL->new();
+ my $jpath = "/^/partial";
+ $json->set_jsonpointer( [$jpath] );
my @results = $json->feed($txt);
- $results[0]->{Value}->[0] == 42;
- $results[1]->{Value} eq 'a string';
+ foreach my $result (@results) {
+ printf("== Got result (path %s) ==\n", $result->{Path});
+ printf("Query was %s\n", $result->{JSONPointer});
+ my $value = $result->{Value};
+ if (!ref $value) {
+ printf("Got scalar value %s\n", $value);
+ } else {
+ printf("Got reference:\n");
+ print Dumper($value);
+ }
+ print "\n";
+ }
+
+Produces:
+
+ == Got result (path /some/partial) ==
+ Query was /^/partial
+ Got scalar value 42.42
- $results[0]->{Path} eq '/some/partial';
- $results[1]->{Path} eq '/other/partial';
+ == Got result (path /other/partial) ==
+ Query was /^/partial
+ Got scalar value a string
+
+ == Got result (path /complex/partial) ==
+ Query was /^/partial
+ Got reference:
+ $VAR1 = {
+ 'a key' => 'a value'
+ };
+
=head2 DESCRIPTION
@@ -315,6 +347,62 @@ To quote:
See SECURITY CONSIDERATIONS in L<JSON::XS>, for more info on why this is useful.
+=head3 object_drip(boolean)
+
+As an alternative to using JSONPointer, you can use an 'object drip'. With this
+setting enabled, all hashes and arrays will be returned via C<feed> or L<fetch>
+in reverse order (i.e. the deepest objects are returned first, followed by
+their encapsulated objects).
+
+This allows you to inspect complete descendent objects as they arrive.
+
+The objects returned by C<fetch> and C<feed> will still follow the same semantics,
+with context/path information stored inside the C<Path> key. The C<JSONPointer>
+field is obviously not passed since it is not being used.
+
+Example:
+
+ use JSON::SL;
+ use Test::More;
+
+ my $sl = JSON::SL->new();
+ $sl->object_drip(1);
+
+ # create an incomplete JSON object:
+
+ my $json = <<'EOJ';
+ [ [ { "key1":"foo", "key2":"bar", "key3":"baz" }
+ EOJ
+
+ my @res = $sl->feed($json);
+
+ my $expected = [
+ {
+ Value => "foo",
+ Path => '/0/0/key1',
+ },
+ {
+ Value => "bar",
+ Path => '/0/0/key2',
+ },
+ {
+ Value => "baz",
+ Path => '/0/0/key3'
+ },
+ {
+ Value => {},
+ Path => '/0/0'
+ },
+ ];
+
+ is_deeply(\@res, $expected, "Got expected results for object drip...");
+
+
+Outer encapsulating objects will have their children removed (as they have
+already been returned in previous results).
+
+Only I<complete> objects (i.e. objects which can no longer contain any more data)
+will be returned.
=head3 unescape_settings()
@@ -406,32 +494,73 @@ undef if the input was empty. Dies on invalid input.
Both L</decode_json> and L</feed> output already-unescaped strings, so there is
no need to call this function on strings returned by those methods.
-=head1 BUGS
+=head1 BUGS & CAVEATS
+
+=head2 Threads
This will most likely not work with threads, although one would wonder why
you would want to use this module across threads.
+=head2 Object Trees
+
When inspecting the object tree, you may see some C<undef> values, and it
is impossible to determine whether those values are JSON C<null>s, or
placeholder values. It would be possible to implement a class e.g.
C<JSON::SL::Placeholder>, but doing so would either be unsafe or incur
additional overhead.
+
+=head2 JSONPointer
+
The C<^> caret is somewhat obscure as a wildcard character
Currently wildcard matching is all-or-nothing, meaning that constructs such
as C<foo^> will not work.
+=head2 Encodings
+
+All input to C<JSON::SL> should be either UTF-8 or ASCII (a subset of UTF-8).
+
+More specifically, the input stream must be any superset of ASCII which uses
+octet streams (so this includes Latin1).
+
+Perl itself only natively deals with 8-bit ASCII, Latin1, or UTF8 - so if your
+input stream is something else (for example, UTF-16) it will need to be converted
+to UTF8 some point in time before it is passed to C<JSON::SL>.
+
+=head1 Speed
+
+C<JSON::SL> aims to be the fastest JSON decoded for Perl. Currently it is only
+in second place - being 25% slower than L<JSON::XS> for C<decode_json> and
+about 8% slower for incremental parsing.
+
+Additionally, if your input has lots of escapes (not very common in real-world
+JSON), C<JSON::SL> will be even slower.
+
+Nevertheless I believe that the benefits provided by JSON::SL save not only human
+time, but also machine time - What good is quickly decoding a large JSON stream
+if there are no proper facilities to inspect it?.
+
+=head1 TODO
+
+Work is in progress for a SAX-style interface. Stay tuned
=head1 SEE ALSO
-L<JSON::XS> - Still faster than this module
+L<JSON::XS> - Still faster than this module, and is also the source of many of
+C<JSON::SL>'s ideas and tests.
If you wish to aid in the development of the JSON parser, do B<not> modify
the source files in the perl distribution, they are merely copied over from
here:
-L<jsonsl | https://github.com/mnunberg/jsonsl> - C core for JSON::SL
+L<jsonsl|https://github.com/mnunberg/jsonsl> - C core for JSON::SL
+
+L<JSON|http://www.json.org> - JSON's main page
+
+L<JSON Specification|http://www.ietf.org/rfc/rfc4627.txt?number=4627>
+
+L<JSONPointer Specification|http://tools.ietf.org/html/draft-pbryan-zyp-json-pointer-02>
=head1 AUTHOR & COPYRIGHT
View
106 lib/JSON/SL/Tuba.pm
@@ -2,9 +2,111 @@ package JSON::SL::Tuba;
use strict;
use warnings;
use JSON::SL;
+use Data::Dumper;
+use Log::Fu;
-sub _do_callout {
- my ($self,$cbname,$cbdata) = @_;
+sub start_JSON {
+ log_info("");
+}
+
+sub end_JSON {
+ log_info("");
+}
+
+sub start_OBJECT {
+ log_info("");
+}
+
+sub end_OBJECT {
+ log_info("");
+}
+
+sub start_HKEY {
+ log_info("");
+}
+
+sub end_HKEY {
+ log_info("");
+}
+
+sub start_STRING {
+ log_info("");
+}
+
+sub end_STRING {
+ log_info("");
+}
+
+sub start_LIST {
+ log_info("");
+}
+sub end_LIST {
+ log_info("");
+}
+
+sub start_NUMBER {
+ log_info("");
+}
+
+sub end_NUMBER {
+ log_info("");
+}
+
+sub start_DATA {
+ my ($self,$data) = @_;
+ log_infof("Got data: '%s'", $data);
+}
+
+
+##### Here be dragons, internal guts ######
+my %ActionMap = (
+ '+' => 'start',
+ '-' => 'end'
+);
+
+my %TypeMap = (
+ '{' => 'OBJECT',
+ '[' => 'LIST',
+ 'c' => 'DATA',
+ 'D' => 'JSON',
+ '#' => 'HKEY',
+ '"' => 'STRING',
+ '^' => 'SPECIAL',
+);
+
+sub new {
+ my ($cls,%options) = @_;
+ $cls->_initialize();
+}
+
+
+
+{
+ no warnings 'once';
+ *parse = *_parse;
+}
+
+sub _plhelper {
+ my ($tuba,$action,$type,$data) = @_;
+ my $action_prefix = $ActionMap{chr($action)};
+ my $type_suffix = $TypeMap{chr($type)};
+ if ($action_prefix && $type_suffix) {
+ my $methname = sprintf("%s_%s", $action_prefix, $type_suffix);
+ if (my $meth = $tuba->can($methname)) {
+ $meth->($tuba, $data);
+ return;
+ } else {
+ log_err("Couldn't locate method $methname");
+ }
+ } else {
+ log_warnf("Got (unhandled) action=%s, type=%s", chr($action), chr($type));
+ }
+ if ($data) {
+ log_infof("Data: %s", $data);
+ }
+
+ #my $methname = "$action_prefix\_$type_suffix";
+ #warn "Will call '$methname'";
}
1;
View
79 perl-jsonsl.h
@@ -29,6 +29,12 @@
#define PLTUBA_CLASS_NAME "JSON::SL::Tuba"
#define PLTUBA_HELPER_FUNC "JSON::SL::Tuba::_plhelper"
+#if PERL_VERSION >= 10
+#define PLJSONSL_HAVE_HV_COMMON
+#else
+#warning "You are using a Perl from the stone age. This code might work.."
+#endif /* 5.10.0 */
+
/**
* Extended fields for a stack state
* sv: the raw SV (never a reference)
@@ -74,27 +80,25 @@
#define PLJSONSL_mkTHX(pjsn)
#endif /* PERL_IMPLICIT_CONTEXT */
+
+#define PLJSONSL_COMMON_FIELDS \
+ /* The lexer */ \
+ jsonsl_t jsn; \
+ /* Input buffer */ \
+ SV *buf; \
+ /* Start position of the buffer (relative to input stream) */ \
+ size_t pos_min_valid; \
+ /* Position of the beginning of the earlist of (SPECIAL,STRINGY) */ \
+ size_t keep_pos; \
+ /* Context for threaded Perls */ \
+ void *pl_thx;
+
typedef struct {
- /* Our lexer */
- jsonsl_t jsn;
+ PLJSONSL_COMMON_FIELDS;
/* Root perl data structure. This is either an HV* or AV* */
SV *root;
- /* Backlog buffer, for large strings, 'special' data, \u-escaping,
- * and other fun stuff
- */
-
- SV *buf;
-
- /* The minimum valid position. Offsets smaller than this do not
- * point to valid data anymore
- */
- size_t pos_min_valid;
-
- /* Minimum backlog position */
- size_t keep_pos;
-
/**
* "current" hash key. This is always a pointer to an HE* of an existing
* hash entry, and thus should never be freed/destroyed directly.
@@ -103,19 +107,23 @@ typedef struct {
*/
HE *curhk;
+#ifndef PLJSONSL_HAVE_HV_COMMON
/**
* For older perls not exposing hv_common, we need a key sv.
- * make this as efficient as possible.
+ * make this as efficient as possible. Instead of instantiating a new
+ * SV each time for hv_fetch_ent, we keep one cached, and change its
+ * PV slot as needed. I am able to do this because I have looked at 5.8's
+ * implementation for the hv_* methods in hv.c and unless the hash is magical,
+ * the behavior is to simply extract the PV from the SV in the beginning
+ * anyway.
*/
SV *ksv;
char *ksv_origpv;
+#endif
/* Stash for booleans */
HV *stash_boolean;
- /* Context (THX) for threaded perls */
- void *pl_thx;
-
/**
* Variables the user might set or be interested in (via function calls,
* of course) are here:
@@ -124,7 +132,10 @@ typedef struct {
int utf8; /** Set the SvUTF8 flag */
int nopath; /** Don't include path context in results */
int noqstr; /** Don't include original query string in results */
- int max_size;
+ int max_size; /** maximum input size (from JSON::XS) */
+ /* ignore the jsonpointer settings and allow an 'iv-drip' of
+ * objects to be returned via feed */
+ int object_drip;
} options;
/**
@@ -171,28 +182,16 @@ typedef enum {
* differences.
*/
typedef struct {
- jsonsl_t jsn;
-
- /* Position at which our last callback was invoked */
- size_t last_cb_pos;
+ PLJSONSL_COMMON_FIELDS;
- /* position at which the last call to feed was made */
- size_t buf_pos;
-
- /**
- * In cases (or actually, usually) when 'character' data is at
- * some kind of beginning, the first character is the opening
- * token itself, usually a quote. this variable defines an
- * offset (either 1 or 0) for which data is to actually be
- * delivered to the user.
+ /* When we invoke a callback, instead of re-creating the
+ * mortalized rv each time, we just keep a static reference
+ * to ourselves
*/
- ssize_t chardata_begin_offset;
+ SV *selfrv;
- /* Buffer containing data to be dispatched */
- SV *buf;
-
- /* my_perl, for threaded perls */
- void *pl_thx;
+ /* set by hkey and string callbacks */
+ int shift_quote;
/* Options */
struct {
View
4 t/03-unescape.t
@@ -3,7 +3,6 @@ use strict;
use warnings;
use Test::More;
use JSON::SL qw(unescape_json_string);
-use Data::Dumper::Concise;
use utf8;
my $jsn = JSON::SL->new();
@@ -29,11 +28,10 @@ $res = $sl->feed($str);
ok($res, "Got result..");
ok(exists $res->{'LF_Key\n'}, "embedded '\\n' key in tact");
is($res->{'LF_Key\n'}, 'LF_Value\n', '\n value in tact');
-#print Dumper($res);
$sl->unescape_settings(n => 1);
$sl->reset();
$res = $sl->feed($str);
ok(exists $res->{"LF_Key\n"}, "Newline key unescaped..");
is($res->{"LF_Key\n"}, "LF_Value\n", "Newline value unescaped");
-done_testing();
+done_testing();
View
2  t/09-pc_base.t
@@ -38,4 +38,4 @@ eval q{ decode_json($js) };
ok($@);
like($@, qr/incomplete/i);
-done_testing();
+done_testing();
View
48 t/20-object_drip.t
@@ -0,0 +1,48 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+
+use JSON::SL;
+use Test::More;
+
+my $sl = JSON::SL->new();
+$sl->object_drip(1);
+
+my $json = <<'EOJ';
+[
+ [
+ {
+ "key1" : "foo",
+ "key2" : "bar",
+ "key3" : "baz"
+ }
+ ]
+EOJ
+my @res = $sl->feed($json);
+
+my $expected = [
+ {
+ Value => "foo",
+ Path => '/0/0/key1',
+ },
+ {
+ Value => "bar",
+ Path => '/0/0/key2',
+ },
+ {
+ Value => "baz",
+ Path => '/0/0/key3'
+ },
+ {
+ Value => {},
+ Path => '/0/0'
+ },
+ {
+ Value => [],
+ Path => '/0'
+ }
+];
+
+is_deeply(\@res, $expected, "Got expected results for object drip...");
+done_testing();
+
Please sign in to comment.
Something went wrong with that request. Please try again.