Skip to content

Commit

Permalink
fix: json string serializer improperly escaping characters (transmiss…
Browse files Browse the repository at this point in the history
…ion#6005)

* feat: escape json string according to RFC8259

* fix: do not append newline when json serde is in compact mode

* fix: json tests

1. Use the same locale settings as the apps
2. Added additional test case for a string that are known to be prone to locale issues
3. Removed test for escaping non-BMP characters to UTF-16 escape sequences

* chore: add more test cases to `JSONTest.testUtf8`

* chore: order cases in the same order as RFC8259
  • Loading branch information
tearfur committed Oct 17, 2023
1 parent d273e0f commit 0259edb
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 76 deletions.
20 changes: 17 additions & 3 deletions libtransmission/utils.cc
Expand Up @@ -60,18 +60,32 @@ time_t libtransmission::detail::tr_time::current_time = {};

// ---

void tr_locale_set_global(char const* locale_name) noexcept
std::optional<std::locale> tr_locale_set_global(char const* locale_name) noexcept
{
try
{
std::locale::global(std::locale{ locale_name });
return tr_locale_set_global(std::locale{ locale_name });
}
catch (std::runtime_error const&)
{
return {};
}
}

std::optional<std::locale> tr_locale_set_global(std::locale const& locale) noexcept
{
try
{
auto old_locale = std::locale::global(locale);

std::cout.imbue(std::locale{});
std::cerr.imbue(std::locale{});

return old_locale;
}
catch (std::exception const&)
{
// Ignore.
return {};
}
}

Expand Down
5 changes: 4 additions & 1 deletion libtransmission/utils.h
Expand Up @@ -10,6 +10,7 @@
#include <cstdint> // uint8_t, uint32_t, uint64_t
#include <cstddef> // size_t
#include <ctime> // time_t
#include <locale>
#include <memory>
#include <optional>
#include <string>
Expand Down Expand Up @@ -54,7 +55,9 @@ struct tr_error;
#define tr_ngettext(singular, plural, count) ((count) == 1 ? (singular) : (plural))
#endif

void tr_locale_set_global(char const* locale_name) noexcept;
std::optional<std::locale> tr_locale_set_global(char const* locale_name) noexcept;

std::optional<std::locale> tr_locale_set_global(std::locale const& locale) noexcept;

// ---

Expand Down
64 changes: 21 additions & 43 deletions libtransmission/variant-json.cc
Expand Up @@ -569,41 +569,36 @@ void jsonRealFunc(tr_variant const& /*var*/, double const val, void* vdata)
jsonChildFunc(data);
}

[[nodiscard]] char* write_escaped_char(char* buf, char const* const end, std::string_view& sv)
{
auto u16buf = std::array<std::uint16_t, 2>{};

auto const* const begin8 = std::data(sv);
auto const* const end8 = begin8 + std::size(sv);
auto const* walk8 = begin8;
utf8::next(walk8, end8);
auto const end16 = utf8::utf8to16(begin8, walk8, std::begin(u16buf));

for (auto it = std::cbegin(u16buf); it != end16; ++it)
{
buf = fmt::format_to_n(buf, end - buf - 1, FMT_COMPILE("\\u{:04x}"), *it).out;
}

sv.remove_prefix(walk8 - begin8 - 1);
return buf;
}

// https://datatracker.ietf.org/doc/html/rfc8259#section-7
void jsonStringFunc(tr_variant const& /*var*/, std::string_view sv, void* vdata)
{
auto* const data = static_cast<struct JsonWalk*>(vdata);

auto const utf8_str = tr_strv_convert_utf8(sv);
auto utf8_sv = std::string_view{ utf8_str };

auto& out = data->out;
auto const [buf, buflen] = out.reserve_space(std::size(sv) * 6 + 2);
auto const [buf, buflen] = out.reserve_space(std::size(utf8_sv) * 6 + 2);
auto* walk = reinterpret_cast<char*>(buf);
auto const* const begin = walk;
auto const* const end = begin + buflen;

*walk++ = '"';

for (; !std::empty(sv); sv.remove_prefix(1))
for (; !std::empty(utf8_sv); utf8_sv.remove_prefix(1))
{
switch (sv.front())
switch (utf8_sv.front())
{
case '"':
*walk++ = '\\';
*walk++ = '"';
break;

case '\\':
*walk++ = '\\';
*walk++ = '\\';
break;

case '\b':
*walk++ = '\\';
*walk++ = 'b';
Expand All @@ -629,31 +624,14 @@ void jsonStringFunc(tr_variant const& /*var*/, std::string_view sv, void* vdata)
*walk++ = 't';
break;

case '"':
*walk++ = '\\';
*walk++ = '"';
break;

case '\\':
*walk++ = '\\';
*walk++ = '\\';
break;

default:
if (isprint((unsigned char)sv.front()) != 0)
if (utf8_sv.front() >= '\u0000' && utf8_sv.front() <= '\u001f')
{
*walk++ = sv.front();
walk = fmt::format_to_n(walk, end - walk - 1, "\\u{:04x}", utf8_sv.front()).out;
}
else
{
try
{
walk = write_escaped_char(walk, end, sv);
}
catch (utf8::exception const&)
{
*walk++ = '?';
}
*walk++ = utf8_sv.front();
}
break;
}
Expand Down Expand Up @@ -733,7 +711,7 @@ std::string tr_variant_serde::to_json_string(tr_variant const& var) const
walk(var, Funcs, &data, true);

auto& buf = data.out;
if (!std::empty(buf))
if (!compact_ && !std::empty(buf))
{
buf.push_back('\n');
}
Expand Down
61 changes: 32 additions & 29 deletions tests/libtransmission/json-test.cc
Expand Up @@ -13,6 +13,7 @@
#include <string_view>

#include <libtransmission/quark.h>
#include <libtransmission/utils.h>
#include <libtransmission/variant.h>

#include "gtest/gtest.h"
Expand All @@ -24,12 +25,11 @@ class JSONTest : public ::testing::TestWithParam<char const*>
protected:
void SetUp() override
{
::testing::TestWithParam<char const*>::SetUp();

auto const* locale_str = GetParam();
try
{
old_locale_ = std::locale::global(std::locale{ {}, new std::numpunct_byname<char>{ locale_str } });
}
catch (std::runtime_error const&)
old_locale_ = tr_locale_set_global(locale_str);
if (!old_locale_)
{
GTEST_SKIP();
}
Expand All @@ -39,8 +39,10 @@ class JSONTest : public ::testing::TestWithParam<char const*>
{
if (old_locale_)
{
std::locale::global(*old_locale_);
tr_locale_set_global(*old_locale_);
}

::testing::TestWithParam<char const*>::TearDown();
}

private:
Expand Down Expand Up @@ -95,8 +97,7 @@ TEST_P(JSONTest, testUtf8)
auto sv = std::string_view{};
tr_quark const key = tr_quark_new("key"sv);

auto serde = tr_variant_serde::json();
serde.inplace();
auto serde = tr_variant_serde::json().inplace().compact();
auto var = serde.parse(in).value_or(tr_variant{});
EXPECT_TRUE(var.holds_alternative<tr_variant::Map>());
EXPECT_TRUE(tr_variantDictFindStrView(&var, key, &sv));
Expand All @@ -114,7 +115,7 @@ TEST_P(JSONTest, testUtf8)
* 1. Feed it JSON-escaped nonascii to the JSON decoder.
* 2. Confirm that the result is UTF-8.
* 3. Feed the same UTF-8 back into the JSON encoder.
* 4. Confirm that the result is JSON-escaped.
* 4. Confirm that the result is UTF-8.
* 5. Dogfood that result back into the parser.
* 6. Confirm that the result is UTF-8.
*/
Expand All @@ -127,32 +128,34 @@ TEST_P(JSONTest, testUtf8)
var.clear();

EXPECT_FALSE(std::empty(json));
EXPECT_NE(std::string::npos, json.find("\\u00f6"));
EXPECT_NE(std::string::npos, json.find("\\u00e9"));
EXPECT_EQ(R"({"key":"Letöltések"})"sv, json);
var = serde.parse(json).value_or(tr_variant{});
EXPECT_TRUE(var.holds_alternative<tr_variant::Map>());
EXPECT_TRUE(tr_variantDictFindStrView(&var, key, &sv));
EXPECT_EQ("Letöltések"sv, sv);
}

TEST_P(JSONTest, testUtf16Surrogates)
{
static auto constexpr ThinkingFaceEmojiUtf8 = "\xf0\x9f\xa4\x94"sv;
auto var = tr_variant{};
tr_variantInitDict(&var, 1);
auto const key = tr_quark_new("key"sv);
tr_variantDictAddStr(&var, key, ThinkingFaceEmojiUtf8);
// Test string known to be prone to locale issues
// https://github.com/transmission/transmission/issues/5967
var.clear();
tr_variantInitDict(&var, 1U);
tr_variantDictAddStr(&var, key, "Дыскаграфія"sv);
json = serde.to_string(var);
EXPECT_EQ(R"({"key":"Дыскаграфія"})"sv, json);
var = serde.parse(json).value_or(tr_variant{});
EXPECT_TRUE(var.holds_alternative<tr_variant::Map>());
EXPECT_TRUE(tr_variantDictFindStrView(&var, key, &sv));
EXPECT_EQ("Дыскаграфія"sv, sv);

auto serde = tr_variant_serde::json();
auto const json = serde.compact().to_string(var);
EXPECT_NE(std::string::npos, json.find("ud83e"));
EXPECT_NE(std::string::npos, json.find("udd14"));

auto parsed = serde.parse(json).value_or(tr_variant{});
EXPECT_TRUE(parsed.holds_alternative<tr_variant::Map>());
auto value = std::string_view{};
EXPECT_TRUE(tr_variantDictFindStrView(&parsed, key, &value));
EXPECT_EQ(ThinkingFaceEmojiUtf8, value);
// Thinking emoji 🤔
var.clear();
tr_variantInitDict(&var, 1U);
tr_variantDictAddStr(&var, key, "\xf0\x9f\xa4\x94"sv);
json = serde.to_string(var);
EXPECT_EQ("{\"key\":\"\xf0\x9f\xa4\x94\"}"sv, json);
var = serde.parse(json).value_or(tr_variant{});
EXPECT_TRUE(var.holds_alternative<tr_variant::Map>());
EXPECT_TRUE(tr_variantDictFindStrView(&var, key, &sv));
EXPECT_EQ("\xf0\x9f\xa4\x94"sv, sv);
}

TEST_P(JSONTest, test1)
Expand Down

0 comments on commit 0259edb

Please sign in to comment.