diff --git a/tests/json/test_json.cc b/tests/json/test_json.cc
index c483cf08a7..b4346c5cac 100644
--- a/tests/json/test_json.cc
+++ b/tests/json/test_json.cc
@@ -295,17 +295,15 @@ void test_json_roundtrip_message(const char* json_src,
   upb::json::Parser* parser =
       upb::json::Parser::Create(env.env(), printer->input());
   env.ResetBytesSink(parser->input());
-  env.Reset(json_src, strlen(json_src), false);
+  env.Reset(json_src, strlen(json_src), false, false);
 
   bool ok = env.Start() &&
             env.ParseBuffer(seam) &&
             env.ParseBuffer(-1) &&
             env.End();
 
-  if (!ok) {
-    fprintf(stderr, "upb parse error: %s\n", env.status().error_message());
-  }
   ASSERT(ok);
+  ASSERT(env.CheckConsistency());
 
   if (memcmp(json_expected,
              data_sink.Data().data(),
diff --git a/tests/pb/test_decoder.cc b/tests/pb/test_decoder.cc
index fd9d9ae375..a01e9999ed 100644
--- a/tests/pb/test_decoder.cc
+++ b/tests/pb/test_decoder.cc
@@ -32,6 +32,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sstream>
 
 #include "tests/test_util.h"
 #include "tests/upb_test.h"
@@ -176,6 +177,13 @@ string cat(const string& a, const string& b,
   return ret;
 }
 
+template <typename T>
+string num2string(T num) {
+  std::ostringstream ss;
+  ss << num;
+  return ss.str();
+}
+
 string varint(uint64_t x) {
   char buf[UPB_PB_VARINT_MAX_LEN];
   size_t len = upb_vencode64(x, buf);
@@ -202,6 +210,11 @@ string submsg(uint32_t fn, const string& buf) {
   return cat( tag(fn, UPB_WIRE_TYPE_DELIMITED), delim(buf) );
 }
 
+string group(uint32_t fn, const string& buf) {
+  return cat(tag(fn, UPB_WIRE_TYPE_START_GROUP), buf,
+             tag(fn, UPB_WIRE_TYPE_END_GROUP));
+}
+
 // Like delim()/submsg(), but intentionally encodes an incorrect length.
 // These help test when a delimited boundary doesn't land in the right place.
 string badlen_delim(int err, const string& buf) {
@@ -557,15 +570,14 @@ uint32_t Hash(const string& proto, const string* expected_output, size_t seam1,
 }
 
 void CheckBytesParsed(const upb::pb::Decoder& decoder, size_t ofs) {
-  // We could have parsed as many as 10 bytes fewer than what the decoder
-  // previously accepted, since we can buffer up to 10 partial bytes internally
-  // before accumulating an entire value.
-  const int MAX_BUFFERED = 10;
-
   // We can't have parsed more data than the decoder callback is telling us it
   // parsed.
   ASSERT(decoder.BytesParsed() <= ofs);
-  ASSERT(ofs <= (decoder.BytesParsed() + MAX_BUFFERED));
+
+  // The difference between what we've decoded and what the decoder has accepted
+  // represents the internally buffered amount.  This amount should not exceed
+  // this value which comes from decoder.int.h.
+  ASSERT(ofs <= (decoder.BytesParsed() + UPB_DECODER_MAX_RESIDUAL_BYTES));
 }
 
 static bool parse(VerboseParserEnvironment* env,
@@ -582,7 +594,7 @@ static bool parse(VerboseParserEnvironment* env,
 void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
                     const string& proto, const string* expected_output,
                     size_t i, size_t j, bool may_skip) {
-  env->Reset(proto.c_str(), proto.size(), may_skip);
+  env->Reset(proto.c_str(), proto.size(), may_skip, expected_output == NULL);
   decoder->Reset();
 
   testhash = Hash(proto, expected_output, i, j, may_skip);
@@ -598,7 +610,14 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
       PrintBinary(proto);
       fprintf(stderr, "\n");
       if (expected_output) {
-        fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
+        if (test_mode == ALL_HANDLERS) {
+          fprintf(stderr, "Expected output: %s\n", expected_output->c_str());
+        } else if (test_mode == NO_HANDLERS) {
+          fprintf(stderr,
+                  "No handlers are registered, BUT if they were "
+                  "the expected output would be: %s\n",
+                  expected_output->c_str());
+        }
       } else {
         fprintf(stderr, "Expected to FAIL\n");
       }
@@ -610,7 +629,7 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
               parse(env, *decoder, -1) &&
               env->End();
 
-    ASSERT(ok == env->status().ok());
+    ASSERT(env->CheckConsistency());
 
     if (test_mode == ALL_HANDLERS) {
       if (expected_output) {
@@ -618,18 +637,12 @@ void do_run_decoder(VerboseParserEnvironment* env, upb::pb::Decoder* decoder,
           fprintf(stderr, "Text mismatch: '%s' vs '%s'\n",
                   output.c_str(), expected_output->c_str());
         }
-        if (!ok) {
-          fprintf(stderr, "Failed: %s\n", env->status().error_message());
-        }
         ASSERT(ok);
         ASSERT(output == *expected_output);
       } else {
         if (ok) {
           fprintf(stderr, "Didn't expect ok result, but got output: '%s'\n",
                   output.c_str());
-        } else if (filter_hash) {
-          fprintf(stderr, "Failed as we expected, with message: %s\n",
-                  env->status().error_message());
         }
         ASSERT(!ok);
       }
@@ -657,6 +670,25 @@ void run_decoder(const string& proto, const string* expected_output) {
 const static string thirty_byte_nop = cat(
     tag(NOP_FIELD, UPB_WIRE_TYPE_DELIMITED), delim(string(30, 'X')) );
 
+// Indents and wraps text as if it were a submessage with this field number
+string wrap_text(int32_t fn, const string& text) {
+  string wrapped_text = text;
+  size_t pos = 0;
+  string replace_with = "\n  ";
+  while ((pos = wrapped_text.find("\n", pos)) != string::npos &&
+         pos != wrapped_text.size() - 1) {
+    wrapped_text.replace(pos, 1, replace_with);
+    pos += replace_with.size();
+  }
+  wrapped_text = cat(
+      LINE("<"),
+      num2string(fn), LINE(":{")
+      "  ", wrapped_text,
+      LINE("}")
+      LINE(">"));
+  return wrapped_text;
+}
+
 void assert_successful_parse(const string& proto,
                              const char *expected_fmt, ...) {
   string expected_text;
@@ -668,16 +700,27 @@ void assert_successful_parse(const string& proto,
   // repeat once with no-op padding data at the end of buffer.
   run_decoder(proto, &expected_text);
   run_decoder(cat( proto, thirty_byte_nop ), &expected_text);
+
+  // Test that this also works when wrapped in a submessage or group.
+  // Indent the expected text one level and wrap it.
+  string wrapped_text1 = wrap_text(UPB_DESCRIPTOR_TYPE_MESSAGE, expected_text);
+  string wrapped_text2 = wrap_text(UPB_DESCRIPTOR_TYPE_GROUP, expected_text);
+
+  run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), &wrapped_text1);
+  run_decoder(group(UPB_DESCRIPTOR_TYPE_GROUP, proto), &wrapped_text2);
 }
 
 void assert_does_not_parse_at_eof(const string& proto) {
   run_decoder(proto, NULL);
 
   // Also test that we fail to parse at end-of-submessage, not just
-  // end-of-message.
-  run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
-  run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), thirty_byte_nop),
-              NULL);
+  // end-of-message.  But skip this if we have no handlers, because in that
+  // case we won't descend into the submessage.
+  if (test_mode != NO_HANDLERS) {
+    run_decoder(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto), NULL);
+    run_decoder(cat(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, proto),
+                    thirty_byte_nop), NULL);
+  }
 }
 
 void assert_does_not_parse(const string& proto) {
@@ -905,11 +948,13 @@ void test_invalid() {
            submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, string("     "))));
 
   // Test exceeding the resource limit of stack depth.
-  string buf;
-  for (int i = 0; i <= MAX_NESTING; i++) {
-    buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
+  if (test_mode != NO_HANDLERS) {
+    string buf;
+    for (int i = 0; i <= MAX_NESTING; i++) {
+      buf.assign(submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, buf));
+    }
+    assert_does_not_parse(buf);
   }
-  assert_does_not_parse(buf);
 }
 
 void test_valid() {
@@ -986,6 +1031,15 @@ void test_valid() {
       "<\n>\n");
 
   // Unknown field inside a known submessage.
+  assert_successful_parse(
+      submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string("   "))),
+      LINE("<")
+      LINE("%u:{")
+      LINE("  <")
+      LINE("  >")
+      LINE("}")
+      LINE(">"), UPB_DESCRIPTOR_TYPE_MESSAGE);
+
   assert_successful_parse(
       cat (submsg(UPB_DESCRIPTOR_TYPE_MESSAGE, submsg(12345, string("   "))),
            tag(UPB_DESCRIPTOR_TYPE_INT32, UPB_WIRE_TYPE_VARINT),
@@ -1162,7 +1216,9 @@ void test_valid() {
     indentbuf(&textbuf, total - i - 1);
     textbuf.append(">\n");
   }
-  assert_successful_parse(buf, "%s", textbuf.c_str());
+  // Have to use run_decoder directly, because we are at max nesting and can't
+  // afford the extra nesting that assert_successful_parse() will do.
+  run_decoder(buf, &textbuf);
 }
 
 upb::reffed_ptr<const upb::pb::DecoderMethod> NewMethod(
@@ -1207,10 +1263,11 @@ upb::reffed_ptr<const upb::pb::DecoderMethod> method =
     upb::Sink sink(method->dest_handlers(), &closures[0]);
     upb::pb::Decoder* decoder = CreateDecoder(env.env(), method.get(), &sink);
     env.ResetBytesSink(decoder->input());
-    env.Reset(testdata[i].data, testdata[i].length, true);
+    env.Reset(testdata[i].data, testdata[i].length, true, false);
     ASSERT(env.Start());
     ASSERT(env.ParseBuffer(-1));
     ASSERT(env.End());
+    ASSERT(env.CheckConsistency());
   }
 }
 
diff --git a/tests/test_util.h b/tests/test_util.h
index f3d5d5a952..caf9ee82c2 100644
--- a/tests/test_util.h
+++ b/tests/test_util.h
@@ -29,16 +29,35 @@ class VerboseParserEnvironment {
  public:
   // Pass verbose=true to print detailed diagnostics to stderr.
   VerboseParserEnvironment(bool verbose) : verbose_(verbose) {
-    env_.ReportErrorsTo(&status_);
+    env_.SetErrorFunction(&VerboseParserEnvironment::OnError, this);
   }
 
-  void Reset(const char *buf, size_t len, bool may_skip) {
+  static bool OnError(void *ud, const upb::Status* status) {
+    VerboseParserEnvironment* env = static_cast<VerboseParserEnvironment*>(ud);
+
+    env->saw_error_ = true;
+
+    if (env->expect_error_ && env->verbose_) {
+      fprintf(stderr, "Encountered error, as expected: ");
+    } else if (!env->expect_error_) {
+      fprintf(stderr, "Encountered unexpected error: ");
+    } else {
+      return false;
+    }
+
+    fprintf(stderr, "%s\n", status->error_message());
+    return false;
+  }
+
+  void Reset(const char *buf, size_t len, bool may_skip, bool expect_error) {
     buf_ = buf;
     len_ = len;
     ofs_ = 0;
+    expect_error_ = expect_error;
+    saw_error_ = false;
+    end_ok_set_ = false;
     skip_until_ = may_skip ? 0 : -1;
     skipped_with_null_ = false;
-    status_.Clear();
   }
 
   // The user should call a series of:
@@ -63,7 +82,26 @@ class VerboseParserEnvironment {
     if (verbose_) {
       fprintf(stderr, "Calling end()\n");
     }
-    return sink_->End();
+    end_ok_ = sink_->End();
+    end_ok_set_ = true;
+
+    return end_ok_;
+  }
+
+  bool CheckConsistency() {
+    /* If we called end (which we should only do when previous bytes are fully
+     * accepted), then end() should return true iff there were no errors. */
+    if (end_ok_set_ && end_ok_ != !saw_error_) {
+      fprintf(stderr, "End() status and saw_error didn't match.\n");
+      return false;
+    }
+
+    if (expect_error_ && !saw_error_) {
+      fprintf(stderr, "Expected error but saw none.\n");
+      return false;
+    }
+
+    return true;
   }
 
   bool ParseBuffer(int bytes) {
@@ -117,7 +155,7 @@ class VerboseParserEnvironment {
       }
     }
 
-    if (!status_.ok())
+    if (saw_error_)
       return false;
 
     if (parsed > bytes && skip_until_ >= 0) {
@@ -133,8 +171,6 @@ class VerboseParserEnvironment {
     sink_ = sink;
   }
 
-  const upb::Status& status() { return status_; }
-
   size_t ofs() { return ofs_; }
   upb::Environment* env() { return &env_; }
 
@@ -142,13 +178,16 @@ class VerboseParserEnvironment {
 
  private:
   upb::Environment env_;
-  upb::Status status_;
   upb::BytesSink* sink_;
   const char* buf_;
   size_t len_;
   bool verbose_;
   size_t ofs_;
   void *subc_;
+  bool expect_error_;
+  bool saw_error_;
+  bool end_ok_;
+  bool end_ok_set_;
 
   // When our parse call returns a value greater than the number of bytes
   // we passed in, the decoder is indicating to us that the next N bytes
diff --git a/upb/pb/compile_decoder.c b/upb/pb/compile_decoder.c
index 28282473e8..b75f45c21e 100644
--- a/upb/pb/compile_decoder.c
+++ b/upb/pb/compile_decoder.c
@@ -596,7 +596,12 @@ static void generate_msgfield(compiler *c, const upb_fielddef *f,
 
   if (!sub_m) {
     /* Don't emit any code for this field at all; it will be parsed as an
-     * unknown field. */
+     * unknown field.
+     *
+     * TODO(haberman): we should change this to parse it as a string field
+     * instead.  It will probably be faster, but more importantly, once we
+     * start vending unknown fields, a field shouldn't be treated as unknown
+     * just because it doesn't have subhandlers registered. */
     return;
   }
 
diff --git a/upb/pb/decoder.c b/upb/pb/decoder.c
index 7eca6d726f..e2e79ae7e5 100644
--- a/upb/pb/decoder.c
+++ b/upb/pb/decoder.c
@@ -36,6 +36,11 @@ static const char *kUnterminatedVarint = "Unterminated varint.";
 
 static opcode halt = OP_HALT;
 
+/* A dummy character we can point to when the user passes us a NULL buffer.
+ * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
+ * behavior, which would invalidate functions like curbufleft(). */
+static const char dummy_char;
+
 /* Whether an op consumes any of the input buffer. */
 static bool consumes_input(opcode op) {
   switch (op) {
@@ -191,7 +196,7 @@ static int32_t skip(upb_pbdecoder *d, size_t bytes) {
   if (bytes > delim_remaining(d)) {
     seterr(d, "Skipped value extended beyond enclosing submessage.");
     return upb_pbdecoder_suspend(d);
-  } else if (bufleft(d) > bytes) {
+  } else if (bufleft(d) >= bytes) {
     /* Skipped data is all in current buffer, and more is still available. */
     advance(d, bytes);
     d->skip = 0;
@@ -213,10 +218,39 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
                              size_t size, const upb_bufhandle *handle) {
   UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
 
-  d->buf_param = buf;
+  /* d->skip and d->residual_end could probably elegantly be represented
+   * as a single variable, to more easily represent this invariant. */
+  assert(!(d->skip && d->residual_end > d->residual));
+
+  /* We need to remember the original size_param, so that the value we return
+   * is relative to it, even if we do some skipping first. */
   d->size_param = size;
   d->handle = handle;
 
+  /* Have to handle this case specially (ie. not with skip()) because the user
+   * is allowed to pass a NULL buffer here, which won't allow us to safely
+   * calculate a d->end or use our normal functions like curbufleft(). */
+  if (d->skip && d->skip >= size) {
+    d->skip -= size;
+    d->bufstart_ofs += size;
+    buf = &dummy_char;
+    size = 0;
+
+    /* We can't just return now, because we might need to execute some ops
+     * like CHECKDELIM, which could call some callbacks and pop the stack. */
+  }
+
+  /* We need to pretend that this was the actual buffer param, since some of the
+   * calculations assume that d->ptr/d->buf is relative to this. */
+  d->buf_param = buf;
+
+  if (!buf) {
+    /* NULL buf is ok if its entire span is covered by the "skip" above, but
+     * by this point we know that "skip" doesn't cover the buffer. */
+    seterr(d, "Passed NULL buffer over non-skippable region.");
+    return upb_pbdecoder_suspend(d);
+  }
+
   if (d->residual_end > d->residual) {
     /* We have residual bytes from the last buffer. */
     assert(d->ptr == d->residual);
@@ -226,23 +260,18 @@ int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
 
   d->checkpoint = d->ptr;
 
+  /* Handle skips that don't cover the whole buffer (as above). */
   if (d->skip) {
     size_t skip_bytes = d->skip;
     d->skip = 0;
     CHECK_RETURN(skip(d, skip_bytes));
-    d->checkpoint = d->ptr;
-  }
-
-  if (!buf) {
-    /* NULL buf is ok if its entire span is covered by the "skip" above, but
-     * by this point we know that "skip" doesn't cover the buffer. */
-    seterr(d, "Passed NULL buffer over non-skippable region.");
-    return upb_pbdecoder_suspend(d);
+    checkpoint(d);
   }
 
+  /* If we're inside an unknown group, continue to parse unknown values. */
   if (d->top->groupnum < 0) {
     CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
-    d->checkpoint = d->ptr;
+    checkpoint(d);
   }
 
   return DECODE_OK;
@@ -257,15 +286,14 @@ size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
     d->ptr = d->residual;
     return 0;
   } else {
-    size_t consumed;
+    size_t ret = d->size_param - (d->end - d->checkpoint);
     assert(!in_residual_buf(d, d->checkpoint));
-    assert(d->buf == d->buf_param);
+    assert(d->buf == d->buf_param || d->buf == &dummy_char);
 
-    consumed = d->checkpoint - d->buf;
-    d->bufstart_ofs += consumed;
+    d->bufstart_ofs += (d->checkpoint - d->buf);
     d->residual_end = d->residual;
     switchtobuf(d, d->residual, d->residual_end);
-    return consumed;
+    return ret;
   }
 }
 
@@ -383,8 +411,7 @@ UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
   int bitpos;
   *u64 = 0;
   for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
-    int32_t ret = getbytes(d, &byte, 1);
-    if (ret >= 0) return ret;
+    CHECK_RETURN(getbytes(d, &byte, 1));
     *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
   }
   if(bitpos == 70 && (byte & 0x80)) {
diff --git a/upb/pb/decoder.h b/upb/pb/decoder.h
index 65316b4e40..8172a9971e 100644
--- a/upb/pb/decoder.h
+++ b/upb/pb/decoder.h
@@ -36,6 +36,13 @@ UPB_DECLARE_TYPE(upb::pb::DecoderMethodOptions, upb_pbdecodermethodopts)
 UPB_DECLARE_DERIVED_TYPE(upb::pb::DecoderMethod, upb::RefCounted,
                          upb_pbdecodermethod, upb_refcounted)
 
+/* The maximum number of bytes we are required to buffer internally between
+ * calls to the decoder.  The value is 14: a 5 byte unknown tag plus ten-byte
+ * varint, less one because we are buffering an incomplete value.
+ *
+ * Should only be used by unit tests. */
+#define UPB_DECODER_MAX_RESIDUAL_BYTES 14
+
 #ifdef __cplusplus
 
 /* The parameters one uses to construct a DecoderMethod.
diff --git a/upb/pb/decoder.int.h b/upb/pb/decoder.int.h
index a63f74b9a3..f2bf242bb8 100644
--- a/upb/pb/decoder.int.h
+++ b/upb/pb/decoder.int.h
@@ -218,11 +218,8 @@ struct upb_pbdecoder {
   /* Overall stream offset of "buf." */
   uint64_t bufstart_ofs;
 
-  /* Buffer for residual bytes not parsed from the previous buffer.
-   * The maximum number of residual bytes we require is 12; a five-byte
-   * unknown tag plus an eight-byte value, less one because the value
-   * is only a partial value. */
-  char residual[12];
+  /* Buffer for residual bytes not parsed from the previous buffer. */
+  char residual[UPB_DECODER_MAX_RESIDUAL_BYTES];
   char *residual_end;
 
   /* Bytes of data that should be discarded from the input beore we start