From 33391915429e51477e4ca6e6601b8a539ad96481 Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Thu, 5 Nov 2015 21:39:51 +0100 Subject: [PATCH 1/2] Escape all control characters All control characters (U+0000..U+001f) should be escaped according to RFC4627. Also escape `U+007f DELETE` - is not mentioned in RFC4627 as a control character, but it is helpful to escape it as many fonts don't render it, or weirdly. --- gen/gen.cpp | 14 ++++++++--- lib/univalue_escapes.h | 56 +++++++++++++++++++++--------------------- lib/univalue_write.cpp | 4 +-- test/round1.json | 1 + test/unitester.cpp | 15 ++++++++++- 5 files changed, 56 insertions(+), 34 deletions(-) create mode 100644 test/round1.json diff --git a/gen/gen.cpp b/gen/gen.cpp index 5e5a4d4aed43a8..4d24bbedd636fd 100644 --- a/gen/gen.cpp +++ b/gen/gen.cpp @@ -16,10 +16,17 @@ using namespace std; static bool initEscapes; -static const char *escapes[256]; +static std::string escapes[256]; static void initJsonEscape() { + // Escape all lower control characters (some get overridden with smaller sequences below) + for (int ch=0x00; ch<0x20; ++ch) { + char tmpbuf[20]; + snprintf(tmpbuf, sizeof(tmpbuf), "\\u%04x", ch); + escapes[ch] = std::string(tmpbuf); + } + escapes[(int)'"'] = "\\\""; escapes[(int)'\\'] = "\\\\"; escapes[(int)'\b'] = "\\b"; @@ -27,6 +34,7 @@ static void initJsonEscape() escapes[(int)'\n'] = "\\n"; escapes[(int)'\r'] = "\\r"; escapes[(int)'\t'] = "\\t"; + escapes[(int)'\x7f'] = "\\u007f"; // U+007F DELETE initEscapes = true; } @@ -39,13 +47,13 @@ static void outputEscape() "static const char *escapes[256] = {\n"); for (unsigned int i = 0; i < 256; i++) { - if (!escapes[i]) { + if (escapes[i].empty()) { printf("\tNULL,\n"); } else { printf("\t\""); unsigned int si; - for (si = 0; si < strlen(escapes[i]); si++) { + for (si = 0; si < escapes[i].size(); si++) { char ch = escapes[i][si]; switch (ch) { case '"': diff --git a/lib/univalue_escapes.h b/lib/univalue_escapes.h index 4133b24ca11f85..74596aab6d2c99 100644 --- a/lib/univalue_escapes.h +++ b/lib/univalue_escapes.h @@ -2,38 +2,38 @@ #ifndef BITCOIN_UNIVALUE_UNIVALUE_ESCAPES_H #define BITCOIN_UNIVALUE_UNIVALUE_ESCAPES_H static const char *escapes[256] = { - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, + "\\u0000", + "\\u0001", + "\\u0002", + "\\u0003", + "\\u0004", + "\\u0005", + "\\u0006", + "\\u0007", "\\b", "\\t", "\\n", - NULL, + "\\u000b", "\\f", "\\r", - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, + "\\u000e", + "\\u000f", + "\\u0010", + "\\u0011", + "\\u0012", + "\\u0013", + "\\u0014", + "\\u0015", + "\\u0016", + "\\u0017", + "\\u0018", + "\\u0019", + "\\u001a", + "\\u001b", + "\\u001c", + "\\u001d", + "\\u001e", + "\\u001f", NULL, NULL, "\\\"", @@ -129,7 +129,7 @@ static const char *escapes[256] = { NULL, NULL, NULL, - NULL, + "\\u007f", NULL, NULL, NULL, diff --git a/lib/univalue_write.cpp b/lib/univalue_write.cpp index bce3997af77c40..8076ed38d00558 100644 --- a/lib/univalue_write.cpp +++ b/lib/univalue_write.cpp @@ -25,10 +25,10 @@ static string json_escape(const string& inS) if (escStr) outS += escStr; - else if (isprint(ch)) + else if (ch < 0x80) outS += ch; - else { + else { // TODO handle UTF-8 properly char tmpesc[16]; sprintf(tmpesc, "\\u%04x", ch); outS += tmpesc; diff --git a/test/round1.json b/test/round1.json new file mode 100644 index 00000000000000..a711e7308bf76e --- /dev/null +++ b/test/round1.json @@ -0,0 +1 @@ +["\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f"] diff --git a/test/unitester.cpp b/test/unitester.cpp index 835556e031708f..c87b86acce3330 100644 --- a/test/unitester.cpp +++ b/test/unitester.cpp @@ -20,14 +20,21 @@ using namespace std; string srcdir(JSON_TEST_SRC); +static std::string rtrim(std::string s) +{ + s.erase(s.find_last_not_of(" \n\r\t")+1); + return s; +} + static void runtest(string filename, const string& jdata) { fprintf(stderr, "test %s\n", filename.c_str()); string prefix = filename.substr(0, 4); - bool wantPass = (prefix == "pass"); + bool wantPass = (prefix == "pass") || (prefix == "roun"); bool wantFail = (prefix == "fail"); + bool wantRoundTrip = (prefix == "roun"); assert(wantPass || wantFail); UniValue val; @@ -38,6 +45,11 @@ static void runtest(string filename, const string& jdata) } else { assert(testResult == false); } + + if (wantRoundTrip) { + std::string odata = val.write(0, 0); + assert(odata == rtrim(jdata)); + } } static void runtest_file(const char *filename_) @@ -102,6 +114,7 @@ static const char *filenames[] = { "pass1.json", "pass2.json", "pass3.json", + "round1.json", // round-trip test }; int main (int argc, char *argv[]) From 74821631ec98b95e8491e701c55953003cde1a45 Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Wed, 2 Dec 2015 12:17:28 +0100 Subject: [PATCH 2/2] Add new testcase to Makefile.am Otherwise it's not part of 'make dist' --- Makefile.am | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile.am b/Makefile.am index df9e66229c4069..9e1aede222adc9 100644 --- a/Makefile.am +++ b/Makefile.am @@ -79,6 +79,7 @@ TEST_FILES = \ $(TEST_DATA_DIR)/fail9.json \ $(TEST_DATA_DIR)/pass1.json \ $(TEST_DATA_DIR)/pass2.json \ - $(TEST_DATA_DIR)/pass3.json + $(TEST_DATA_DIR)/pass3.json \ + $(TEST_DATA_DIR)/round1.json EXTRA_DIST=$(TEST_FILES) $(GEN_SRCS)