From 84b76df570dc80b5638d76899aa7bb5c141da301 Mon Sep 17 00:00:00 2001 From: Kevin Ushey Date: Fri, 15 Jan 2021 14:48:55 -0800 Subject: [PATCH] use Python-compatible unicode escapes --- src/cpp/core/StringUtils.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/cpp/core/StringUtils.cpp b/src/cpp/core/StringUtils.cpp index 9bebce6774d..1c0117993f5 100644 --- a/src/cpp/core/StringUtils.cpp +++ b/src/cpp/core/StringUtils.cpp @@ -287,9 +287,18 @@ std::string utf8ToSystem(const std::string& str, if (n == -1) { if (escapeInvalidChars) - output << "\\u{" << std::hex << wide[i] << "}"; + { + // NOTE: in R, both '\u{1234}' and '\u1234' are valid + // ways of specifying a unicode literal, but only the + // latter is accepted by Python, and since the reticulate + // REPL uses the same conversion routines we prefer the + // format compatible with both parsers + output << "\\u" << std::hex << wide[i]; + } else + { output << "?"; // TODO: Use GetCPInfo() + } } else {