From fd32d1ab85c82356f5e8a848514a60e65b03cfe5 Mon Sep 17 00:00:00 2001 From: Russell Yanofsky Date: Tue, 1 Nov 2016 15:49:43 -0400 Subject: [PATCH] Don't require nul-terminated string inputs Avoid giving '\0' characters special treatment in JSON parsing code. This way the parser will reject input strings like "[1,2,3]\0-extra-nonsense" instead of returning a partially parsed interpretation of the input. It also allows the parser to be called with char pointers that might not be nul terminated (like the char pointers returned by std::string::data()). Fixes following test failures in https://github.com/nst/JSONTestSuite: bitcoin SHOULD_HAVE_FAILED n_multidigit_number_then_00.json --- Makefile.am | 8 +++++++- include/univalue.h | 5 +++-- lib/univalue.cpp | 2 +- lib/univalue_read.cpp | 46 +++++++++++++++++++++++++----------------- test/.gitignore | 1 + test/fail42.json | Bin 0 -> 37 bytes test/no_nul.cpp | 8 ++++++++ test/unitester.cpp | 1 + 8 files changed, 48 insertions(+), 23 deletions(-) create mode 100644 test/fail42.json create mode 100644 test/no_nul.cpp diff --git a/Makefile.am b/Makefile.am index 6c1ec81e63fb7c..ee3b33e4a597d0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -20,7 +20,7 @@ libunivalue_la_LDFLAGS = \ -no-undefined libunivalue_la_CXXFLAGS = -I$(top_srcdir)/include -TESTS = test/unitester +TESTS = test/unitester test/no_nul GENBIN = gen/gen$(BUILD_EXEEXT) GEN_SRCS = gen/gen.cpp @@ -42,6 +42,11 @@ test_unitester_LDADD = libunivalue.la test_unitester_CXXFLAGS = -I$(top_srcdir)/include -DJSON_TEST_SRC=\"$(srcdir)/$(TEST_DATA_DIR)\" test_unitester_LDFLAGS = -static $(LIBTOOL_APP_LDFLAGS) +test_no_nul_SOURCES = test/no_nul.cpp +test_no_nul_LDADD = libunivalue.la +test_no_nul_CXXFLAGS = -I$(top_srcdir)/include +test_no_nul_LDFLAGS = -static $(LIBTOOL_APP_LDFLAGS) + TEST_FILES = \ $(TEST_DATA_DIR)/fail10.json \ $(TEST_DATA_DIR)/fail11.json \ @@ -77,6 +82,7 @@ TEST_FILES = \ $(TEST_DATA_DIR)/fail39.json \ $(TEST_DATA_DIR)/fail40.json \ $(TEST_DATA_DIR)/fail41.json \ + $(TEST_DATA_DIR)/fail42.json \ $(TEST_DATA_DIR)/fail3.json \ $(TEST_DATA_DIR)/fail4.json \ $(TEST_DATA_DIR)/fail5.json \ diff --git a/include/univalue.h b/include/univalue.h index e8ce283519fed9..362bd6f28dc288 100644 --- a/include/univalue.h +++ b/include/univalue.h @@ -124,9 +124,10 @@ class UniValue { std::string write(unsigned int prettyIndent = 0, unsigned int indentLevel = 0) const; + bool read(const char *raw, size_t len); bool read(const char *raw); bool read(const std::string& rawStr) { - return read(rawStr.c_str()); + return read(rawStr.data(), rawStr.size()); } private: @@ -240,7 +241,7 @@ enum jtokentype { }; extern enum jtokentype getJsonToken(std::string& tokenVal, - unsigned int& consumed, const char *raw); + unsigned int& consumed, const char *raw, const char *end); extern const char *uvTypeName(UniValue::VType t); static inline bool jsonTokenIsValue(enum jtokentype jtt) diff --git a/lib/univalue.cpp b/lib/univalue.cpp index 5a2860c13f8d6c..6058622a549c87 100644 --- a/lib/univalue.cpp +++ b/lib/univalue.cpp @@ -104,7 +104,7 @@ static bool validNumStr(const string& s) { string tokenVal; unsigned int consumed; - enum jtokentype tt = getJsonToken(tokenVal, consumed, s.c_str()); + enum jtokentype tt = getJsonToken(tokenVal, consumed, s.data(), s.data() + s.size()); return (tt == JTOK_NUMBER); } diff --git a/lib/univalue_read.cpp b/lib/univalue_read.cpp index 95bac6958d0fa7..81cde4c19dd8ad 100644 --- a/lib/univalue_read.cpp +++ b/lib/univalue_read.cpp @@ -43,21 +43,21 @@ static const char *hatoui(const char *first, const char *last, } enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed, - const char *raw) + const char *raw, const char *end) { tokenVal.clear(); consumed = 0; const char *rawStart = raw; - while ((*raw) && (json_isspace(*raw))) // skip whitespace + while (raw < end && (json_isspace(*raw))) // skip whitespace raw++; - switch (*raw) { - - case 0: + if (raw >= end) return JTOK_NONE; + switch (*raw) { + case '{': raw++; consumed = (raw - rawStart); @@ -127,40 +127,40 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed, numStr += *raw; // copy first char raw++; - if ((*first == '-') && (!json_isdigit(*raw))) + if ((*first == '-') && (raw < end) && (!json_isdigit(*raw))) return JTOK_ERR; - while ((*raw) && json_isdigit(*raw)) { // copy digits + while (raw < end && json_isdigit(*raw)) { // copy digits numStr += *raw; raw++; } // part 2: frac - if (*raw == '.') { + if (raw < end && *raw == '.') { numStr += *raw; // copy . raw++; - if (!json_isdigit(*raw)) + if (raw >= end || !json_isdigit(*raw)) return JTOK_ERR; - while ((*raw) && json_isdigit(*raw)) { // copy digits + while (raw < end && json_isdigit(*raw)) { // copy digits numStr += *raw; raw++; } } // part 3: exp - if (*raw == 'e' || *raw == 'E') { + if (raw < end && (*raw == 'e' || *raw == 'E')) { numStr += *raw; // copy E raw++; - if (*raw == '-' || *raw == '+') { // copy +/- + if (raw < end && (*raw == '-' || *raw == '+')) { // copy +/- numStr += *raw; raw++; } - if (!json_isdigit(*raw)) + if (raw >= end || !json_isdigit(*raw)) return JTOK_ERR; - while ((*raw) && json_isdigit(*raw)) { // copy digits + while (raw < end && json_isdigit(*raw)) { // copy digits numStr += *raw; raw++; } @@ -177,13 +177,16 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed, string valStr; JSONUTF8StringFilter writer(valStr); - while (*raw) { + while (raw < end) { if ((unsigned char)*raw < 0x20) return JTOK_ERR; else if (*raw == '\\') { raw++; // skip backslash + if (raw >= end) + return JTOK_ERR; + switch (*raw) { case '"': writer.push_back('\"'); break; case '\\': writer.push_back('\\'); break; @@ -196,7 +199,8 @@ enum jtokentype getJsonToken(string& tokenVal, unsigned int& consumed, case 'u': { unsigned int codepoint; - if (hatoui(raw + 1, raw + 1 + 4, codepoint) != + if (raw + 1 + 4 >= end || + hatoui(raw + 1, raw + 1 + 4, codepoint) != raw + 1 + 4) return JTOK_ERR; writer.push_back_u(codepoint); @@ -246,7 +250,7 @@ enum expect_bits { #define setExpect(bit) (expectMask |= EXP_##bit) #define clearExpect(bit) (expectMask &= ~EXP_##bit) -bool UniValue::read(const char *raw) +bool UniValue::read(const char *raw, size_t size) { clear(); @@ -257,10 +261,11 @@ bool UniValue::read(const char *raw) unsigned int consumed; enum jtokentype tok = JTOK_NONE; enum jtokentype last_tok = JTOK_NONE; + const char* end = raw + size; do { last_tok = tok; - tok = getJsonToken(tokenVal, consumed, raw); + tok = getJsonToken(tokenVal, consumed, raw, end); if (tok == JTOK_NONE || tok == JTOK_ERR) return false; raw += consumed; @@ -432,10 +437,13 @@ bool UniValue::read(const char *raw) } while (!stack.empty ()); /* Check that nothing follows the initial construct (parsed above). */ - tok = getJsonToken(tokenVal, consumed, raw); + tok = getJsonToken(tokenVal, consumed, raw, end); if (tok != JTOK_NONE) return false; return true; } +bool UniValue::read(const char *raw) { + return read(raw, strlen(raw)); +} diff --git a/test/.gitignore b/test/.gitignore index 3d9347fe7e52b4..aea12376e5234f 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -1,4 +1,5 @@ unitester +no_nul *.trs *.log diff --git a/test/fail42.json b/test/fail42.json new file mode 100644 index 0000000000000000000000000000000000000000..9c7565adbddf645df5edfbdcd630c7a0f94aa2eb GIT binary patch literal 37 kcma!6N=i-3FG^L&E6q_zsw_!Wie*qrOe;w(LWpny0Pvs;RsaA1 literal 0 HcmV?d00001 diff --git a/test/no_nul.cpp b/test/no_nul.cpp new file mode 100644 index 00000000000000..83d292200bf875 --- /dev/null +++ b/test/no_nul.cpp @@ -0,0 +1,8 @@ +#include "univalue.h" + +int main (int argc, char *argv[]) +{ + char buf[] = "___[1,2,3]___"; + UniValue val; + return val.read(buf + 3, 7) ? 0 : 1; +} diff --git a/test/unitester.cpp b/test/unitester.cpp index 05f3842cd1eb63..489ae0593054d3 100644 --- a/test/unitester.cpp +++ b/test/unitester.cpp @@ -113,6 +113,7 @@ static const char *filenames[] = { "fail39.json", // invalid unicode: only second half of surrogate pair "fail40.json", // invalid unicode: broken UTF-8 "fail41.json", // invalid unicode: unfinished UTF-8 + "fail42.json", // valid json with garbage following a nul byte "fail3.json", "fail4.json", // extra comma "fail5.json",