diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc index c483cf08a7..b4346c5cac 100644 --- a/tests/json/test_json.cc +++ b/tests/json/test_json.cc @@ -295,17 +295,15 @@ void test_json_roundtrip_message(const char* json_src, upb::json::Parser* parser = upb::json::Parser::Create(env.env(), printer->input()); env.ResetBytesSink(parser->input()); - env.Reset(json_src, strlen(json_src), false); + env.Reset(json_src, strlen(json_src), false, false); bool ok = env.Start() && env.ParseBuffer(seam) && env.ParseBuffer(-1) && env.End(); - if (!ok) { - fprintf(stderr, "upb parse error: %s\n", env.status().error_message()); - } ASSERT(ok); + ASSERT(env.CheckConsistency()); if (memcmp(json_expected, data_sink.Data().data(), diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc index fd9d9ae375..a01e9999ed 100644 --- a/tests/pb/test_decoder.cc +++ b/tests/pb/test_decoder.cc @@ -32,6 +32,7 @@ #include #include #include +#include #include "tests/test_util.h" #include "tests/upb_test.h" @@ -176,6 +177,13 @@ string cat(const string& a, const string& b, return ret; } +template +string num2string(T num) { + std::ostringstream ss; + ss << num; + return ss.str(); +} + string varint(uint64_t x) { char buf[UPB_PB_VARINT_MAX_LEN]; size_t len = upb_vencode64(x, buf); @@ -202,6 +210,11 @@ string submsg(uint32_t fn, const string& buf) { return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) ); } +string group(uint32_t fn, const string& buf) { + return cat(tag(fn, UPB_WIRE_TYPE_START_GROUP), buf, + tag(fn, UPB_WIRE_TYPE_END_GROUP)); +} + // Like delim()/submsg(), but intentionally encodes an incorrect length. // These help test when a delimited boundary doesn't land in the right place. string badlen_delim(int err, const string& buf) { @@ -557,15 +570,14 @@ uint32_t Hash(const string& proto, const string* expected_output, size_t seam1, } void CheckBytesParsed(const upb::pb::Decoder& decoder, size_t ofs) { - // We could have parsed as many as 10 bytes fewer than what the decoder - // previously accepted, since we can buffer up to 10 partial bytes internally - // before accumulating an entire value. - const int MAX_BUFFERED = 10; - // We can't have parsed more data than the decoder callback is telling us it // parsed. ASSERT(decoder.BytesParsed() <= ofs); - ASSERT(ofs <= (decoder.BytesParsed() + MAX_BUFFERED)); + + // The difference between what we've decoded and what the decoder has accepted + // represents the internally buffered amount. This amount should not exceed + // this value which comes from decoder.int.h. + ASSERT(ofs <= (decoder.BytesParsed() + UPB_DECODER_MAX_RESIDUAL_BYTES)); } static bool parse(VerboseParserEnvironment* env, @@ -582,7 +594,7 @@ static bool parse(VerboseParserEnvironment* env, void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder, const string& proto, const string* expected_output, size_t i, size_t j, bool may_skip) { - env->Reset(proto.c_str(), proto.size(), may_skip); + env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL); decoder->Reset(); testhash = Hash(proto, expected_output, i, j, may_skip); @@ -598,7 +610,14 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder, PrintBinary(proto); fprintf(stderr, "\n"); if (expected_output) { - fprintf(stderr, "Expected output: %s\n", expected_output->c_str()); + if (test_mode == ALL_HANDLERS) { + fprintf(stderr, "Expected output: %s\n", expected_output->c_str()); + } else if (test_mode == NO_HANDLERS) { + fprintf(stderr, + "No handlers are registered, BUT if they were " + "the expected output would be: %s\n", + expected_output->c_str()); + } } else { fprintf(stderr, "Expected to FAIL\n"); } @@ -610,7 +629,7 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder, parse(env, *decoder, -1) && env->End(); - ASSERT(ok == env->status().ok()); + ASSERT(env->CheckConsistency()); if (test_mode == ALL_HANDLERS) { if (expected_output) { @@ -618,18 +637,12 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder, fprintf(stderr, "Text mismatch: '%s' vs '%s'\n", output.c_str(), expected_output->c_str()); } - if (!ok) { - fprintf(stderr, "Failed: %s\n", env->status().error_message()); - } ASSERT(ok); ASSERT(output == *expected_output); } else { if (ok) { fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n", output.c_str()); - } else if (filter_hash) { - fprintf(stderr, "Failed as we expected, with message: %s\n", - env->status().error_message()); } ASSERT(!ok); } @@ -657,6 +670,25 @@ void run_decoder(const string& proto, const string* expected_output) { const static string thirty_byte_nop = cat( tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(string(30, 'X')) ); +// Indents and wraps text as if it were a submessage with this field number +string wrap_text(int32_t fn, const string& text) { + string wrapped_text = text; + size_t pos = 0; + string replace_with = "\n "; + while ((pos = wrapped_text.find("\n", pos)) != string::npos && + pos != wrapped_text.size() - 1) { + wrapped_text.replace(pos, 1, replace_with); + pos += replace_with.size(); + } + wrapped_text = cat( + LINE("<"), + num2string(fn), LINE(":{") + " ", wrapped_text, + LINE("}") + LINE(">")); + return wrapped_text; +} + void assert_successful_parse(const string& proto, const char *expected_fmt, ...) { string expected_text; @@ -668,16 +700,27 @@ void assert_successful_parse(const string& proto, // repeat once with no-op padding data at the end of buffer. run_decoder(proto, &expected_text); run_decoder(cat( proto, thirty_byte_nop ), &expected_text); + + // Test that this also works when wrapped in a submessage or group. + // Indent the expected text one level and wrap it. + string wrapped_text1 = wrap_text(UPB_DESCRIPTOR_TYPE_MESSAGE, expected_text); + string wrapped_text2 = wrap_text(UPB_DESCRIPTOR_TYPE_GROUP, expected_text); + + run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), &wrapped_text1); + run_decoder(group(UPB_DESCRIPTOR_TYPE_GROUP, proto), &wrapped_text2); } void assert_does_not_parse_at_eof(const string& proto) { run_decoder(proto, NULL); // Also test that we fail to parse at end-of-submessage, not just - // end-of-message. - run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL); - run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), thirty_byte_nop), - NULL); + // end-of-message. But skip this if we have no handlers, because in that + // case we won't descend into the submessage. + if (test_mode != NO_HANDLERS) { + run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL); + run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), + thirty_byte_nop), NULL); + } } void assert_does_not_parse(const string& proto) { @@ -905,11 +948,13 @@ void test_invalid() { submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string(" ")))); // Test exceeding the resource limit of stack depth. - string buf; - for (int i = 0; i <= MAX_NESTING; i++) { - buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf)); + if (test_mode != NO_HANDLERS) { + string buf; + for (int i = 0; i <= MAX_NESTING; i++) { + buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf)); + } + assert_does_not_parse(buf); } - assert_does_not_parse(buf); } void test_valid() { @@ -986,6 +1031,15 @@ void test_valid() { "<\n>\n"); // Unknown field inside a known submessage. + assert_successful_parse( + submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))), + LINE("<") + LINE("%u:{") + LINE(" <") + LINE(" >") + LINE("}") + LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE); + assert_successful_parse( cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string(" "))), tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT), @@ -1162,7 +1216,9 @@ void test_valid() { indentbuf(&textbuf, total - i - 1); textbuf.append(">\n"); } - assert_successful_parse(buf, "%s", textbuf.c_str()); + // Have to use run_decoder directly, because we are at max nesting and can't + // afford the extra nesting that assert_successful_parse() will do. + run_decoder(buf, &textbuf); } upb::reffed_ptr NewMethod( @@ -1207,10 +1263,11 @@ upb::reffed_ptr method = upb::Sink sink(method->dest_handlers(), &closures[0]); upb::pb::Decoder* decoder = CreateDecoder(env.env(), method.get(), &sink); env.ResetBytesSink(decoder->input()); - env.Reset(testdata[i].data, testdata[i].length, true); + env.Reset(testdata[i].data, testdata[i].length, true, false); ASSERT(env.Start()); ASSERT(env.ParseBuffer(-1)); ASSERT(env.End()); + ASSERT(env.CheckConsistency()); } } diff --git a/tests/test_util.h b/tests/test_util.h index f3d5d5a952..caf9ee82c2 100644 --- a/tests/test_util.h +++ b/tests/test_util.h @@ -29,16 +29,35 @@ class VerboseParserEnvironment { public: // Pass verbose=true to print detailed diagnostics to stderr. VerboseParserEnvironment(bool verbose) : verbose_(verbose) { - env_.ReportErrorsTo(&status_); + env_.SetErrorFunction(&VerboseParserEnvironment::OnError, this); } - void Reset(const char *buf, size_t len, bool may_skip) { + static bool OnError(void *ud, const upb::Status* status) { + VerboseParserEnvironment* env = static_cast(ud); + + env->saw_error_ = true; + + if (env->expect_error_ && env->verbose_) { + fprintf(stderr, "Encountered error, as expected: "); + } else if (!env->expect_error_) { + fprintf(stderr, "Encountered unexpected error: "); + } else { + return false; + } + + fprintf(stderr, "%s\n", status->error_message()); + return false; + } + + void Reset(const char *buf, size_t len, bool may_skip, bool expect_error) { buf_ = buf; len_ = len; ofs_ = 0; + expect_error_ = expect_error; + saw_error_ = false; + end_ok_set_ = false; skip_until_ = may_skip ? 0 : -1; skipped_with_null_ = false; - status_.Clear(); } // The user should call a series of: @@ -63,7 +82,26 @@ class VerboseParserEnvironment { if (verbose_) { fprintf(stderr, "Calling end()\n"); } - return sink_->End(); + end_ok_ = sink_->End(); + end_ok_set_ = true; + + return end_ok_; + } + + bool CheckConsistency() { + /* If we called end (which we should only do when previous bytes are fully + * accepted), then end() should return true iff there were no errors. */ + if (end_ok_set_ && end_ok_ != !saw_error_) { + fprintf(stderr, "End() status and saw_error didn't match.\n"); + return false; + } + + if (expect_error_ && !saw_error_) { + fprintf(stderr, "Expected error but saw none.\n"); + return false; + } + + return true; } bool ParseBuffer(int bytes) { @@ -117,7 +155,7 @@ class VerboseParserEnvironment { } } - if (!status_.ok()) + if (saw_error_) return false; if (parsed > bytes && skip_until_ >= 0) { @@ -133,8 +171,6 @@ class VerboseParserEnvironment { sink_ = sink; } - const upb::Status& status() { return status_; } - size_t ofs() { return ofs_; } upb::Environment* env() { return &env_; } @@ -142,13 +178,16 @@ class VerboseParserEnvironment { private: upb::Environment env_; - upb::Status status_; upb::BytesSink* sink_; const char* buf_; size_t len_; bool verbose_; size_t ofs_; void *subc_; + bool expect_error_; + bool saw_error_; + bool end_ok_; + bool end_ok_set_; // When our parse call returns a value greater than the number of bytes // we passed in, the decoder is indicating to us that the next N bytes diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c index 28282473e8..b75f45c21e 100644 --- a/upb/pb/compile_decoder.c +++ b/upb/pb/compile_decoder.c @@ -596,7 +596,12 @@ static void generate_msgfield(compiler *c, const upb_fielddef *f, if (!sub_m) { /* Don't emit any code for this field at all; it will be parsed as an - * unknown field. */ + * unknown field. + * + * TODO(haberman): we should change this to parse it as a string field + * instead. It will probably be faster, but more importantly, once we + * start vending unknown fields, a field shouldn't be treated as unknown + * just because it doesn't have subhandlers registered. */ return; } diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c index 7eca6d726f..e2e79ae7e5 100644 --- a/upb/pb/decoder.c +++ b/upb/pb/decoder.c @@ -36,6 +36,11 @@ static const char *kUnterminatedVarint = "Unterminated varint."; static opcode halt = OP_HALT; +/* A dummy character we can point to when the user passes us a NULL buffer. + * We need this because in C (NULL + 0) and (NULL - NULL) are undefined + * behavior, which would invalidate functions like curbufleft(). */ +static const char dummy_char; + /* Whether an op consumes any of the input buffer. */ static bool consumes_input(opcode op) { switch (op) { @@ -191,7 +196,7 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) { if (bytes > delim_remaining(d)) { seterr(d, "Skipped value extended beyond enclosing submessage."); return upb_pbdecoder_suspend(d); - } else if (bufleft(d) > bytes) { + } else if (bufleft(d) >= bytes) { /* Skipped data is all in current buffer, and more is still available. */ advance(d, bytes); d->skip = 0; @@ -213,10 +218,39 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, size_t size, const upb_bufhandle *handle) { UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */ - d->buf_param = buf; + /* d->skip and d->residual_end could probably elegantly be represented + * as a single variable, to more easily represent this invariant. */ + assert(!(d->skip && d->residual_end > d->residual)); + + /* We need to remember the original size_param, so that the value we return + * is relative to it, even if we do some skipping first. */ d->size_param = size; d->handle = handle; + /* Have to handle this case specially (ie. not with skip()) because the user + * is allowed to pass a NULL buffer here, which won't allow us to safely + * calculate a d->end or use our normal functions like curbufleft(). */ + if (d->skip && d->skip >= size) { + d->skip -= size; + d->bufstart_ofs += size; + buf = &dummy_char; + size = 0; + + /* We can't just return now, because we might need to execute some ops + * like CHECKDELIM, which could call some callbacks and pop the stack. */ + } + + /* We need to pretend that this was the actual buffer param, since some of the + * calculations assume that d->ptr/d->buf is relative to this. */ + d->buf_param = buf; + + if (!buf) { + /* NULL buf is ok if its entire span is covered by the "skip" above, but + * by this point we know that "skip" doesn't cover the buffer. */ + seterr(d, "Passed NULL buffer over non-skippable region."); + return upb_pbdecoder_suspend(d); + } + if (d->residual_end > d->residual) { /* We have residual bytes from the last buffer. */ assert(d->ptr == d->residual); @@ -226,23 +260,18 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, d->checkpoint = d->ptr; + /* Handle skips that don't cover the whole buffer (as above). */ if (d->skip) { size_t skip_bytes = d->skip; d->skip = 0; CHECK_RETURN(skip(d, skip_bytes)); - d->checkpoint = d->ptr; - } - - if (!buf) { - /* NULL buf is ok if its entire span is covered by the "skip" above, but - * by this point we know that "skip" doesn't cover the buffer. */ - seterr(d, "Passed NULL buffer over non-skippable region."); - return upb_pbdecoder_suspend(d); + checkpoint(d); } + /* If we're inside an unknown group, continue to parse unknown values. */ if (d->top->groupnum < 0) { CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); - d->checkpoint = d->ptr; + checkpoint(d); } return DECODE_OK; @@ -257,15 +286,14 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { d->ptr = d->residual; return 0; } else { - size_t consumed; + size_t ret = d->size_param - (d->end - d->checkpoint); assert(!in_residual_buf(d, d->checkpoint)); - assert(d->buf == d->buf_param); + assert(d->buf == d->buf_param || d->buf == &dummy_char); - consumed = d->checkpoint - d->buf; - d->bufstart_ofs += consumed; + d->bufstart_ofs += (d->checkpoint - d->buf); d->residual_end = d->residual; switchtobuf(d, d->residual, d->residual_end); - return consumed; + return ret; } } @@ -383,8 +411,7 @@ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, int bitpos; *u64 = 0; for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { - int32_t ret = getbytes(d, &byte, 1); - if (ret >= 0) return ret; + CHECK_RETURN(getbytes(d, &byte, 1)); *u64 |= (uint64_t)(byte & 0x7F) << bitpos; } if(bitpos == 70 && (byte & 0x80)) { diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h index 65316b4e40..8172a9971e 100644 --- a/upb/pb/decoder.h +++ b/upb/pb/decoder.h @@ -36,6 +36,13 @@ UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts) UPB_DECLARE_DERIVED_TYPE(upb::pb::DecoderMethod, upb::RefCounted, upb_pbdecodermethod, upb_refcounted) +/* The maximum number of bytes we are required to buffer internally between + * calls to the decoder. The value is 14: a 5 byte unknown tag plus ten-byte + * varint, less one because we are buffering an incomplete value. + * + * Should only be used by unit tests. */ +#define UPB_DECODER_MAX_RESIDUAL_BYTES 14 + #ifdef __cplusplus /* The parameters one uses to construct a DecoderMethod. diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h index a63f74b9a3..f2bf242bb8 100644 --- a/upb/pb/decoder.int.h +++ b/upb/pb/decoder.int.h @@ -218,11 +218,8 @@ struct upb_pbdecoder { /* Overall stream offset of "buf." */ uint64_t bufstart_ofs; - /* Buffer for residual bytes not parsed from the previous buffer. - * The maximum number of residual bytes we require is 12; a five-byte - * unknown tag plus an eight-byte value, less one because the value - * is only a partial value. */ - char residual[12]; + /* Buffer for residual bytes not parsed from the previous buffer. */ + char residual[UPB_DECODER_MAX_RESIDUAL_BYTES]; char *residual_end; /* Bytes of data that should be discarded from the input beore we start