Skip to content

Commit

Permalink
Homogenize console output (which may be a mix of MBS and UTF-8) to UTF-8
Browse files Browse the repository at this point in the history
  • Loading branch information
jcheng5 committed Mar 23, 2011
1 parent 7f71b97 commit b6d47e7
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 18 deletions.
50 changes: 42 additions & 8 deletions src/cpp/core/StringUtils.cpp
Expand Up @@ -57,17 +57,19 @@ void convertLineEndings(std::string* pStr, LineEnding type)
*pStr = boost::regex_replace(*pStr, boost::regex("\\r?\\n|\\xE2\\x80[\\xA8\\xA9]"), replacement);
}

void utf8ToSystem(const std::string& str,
std::string* pOutput,
bool escapeInvalidChars)
std::string utf8ToSystem(const std::string& str,
bool escapeInvalidChars)
{
if (str.empty())
return std::string();

#ifdef _WIN32
wchar_t wide[str.length() + 1];
int chars = ::MultiByteToWideChar(CP_UTF8, 0, str.c_str(), -1, wide, sizeof(wide));
if (chars < 0)
{
*pOutput = str;
return;
LOG_ERROR(systemError(::GetLastError(), ERROR_LOCATION));
return str;
}

std::ostringstream output;
Expand All @@ -80,15 +82,47 @@ void utf8ToSystem(const std::string& str,
if (escapeInvalidChars)
output << "\\u{" << std::hex << wide[i] << "}";
else
output << "?";
output << "?"; // TODO: Use GetCPInfo()
}
else
output.write(mbbuf, mbc);
}
*pOutput = output.str();
return output.str();
#else
// Assumes that UTF8 is the locale on POSIX
*pOutput = str;
return str;
#endif
}

std::string systemToUtf8(const std::string& str)
{
if (str.empty())
return std::string();

#ifdef _WIN32
wchar_t wide[str.length() + 1];
int chars = ::MultiByteToWideChar(CP_ACP, 0, str.c_str(), str.length(), wide, sizeof(wide));
if (chars < 0)
{
LOG_ERROR(systemError(::GetLastError(), ERROR_LOCATION));
return str;
}

int bytesRequired = ::WideCharToMultiByte(CP_UTF8, 0, wide, chars,
NULL, 0,
NULL, NULL);
if (bytesRequired == 0)
{
LOG_ERROR(systemError(::GetLastError(), ERROR_LOCATION));
return str;
}
std::vector<char> buf(bytesRequired, 0);
int bytesWritten = ::WideCharToMultiByte(CP_UTF8, 0, wide, chars,
&(buf[0]), buf.size(),
NULL, NULL);
return std::string(buf.begin(), buf.end());
#else
return str;
#endif
}

Expand Down
6 changes: 3 additions & 3 deletions src/cpp/core/include/core/StringUtils.hpp
Expand Up @@ -27,9 +27,9 @@ enum LineEnding {
LineEndingPassthrough
};

void utf8ToSystem(const std::string& str,
std::string* pOutput,
bool escapeInvalidChars=false);
std::string utf8ToSystem(const std::string& str,
bool escapeInvalidChars=false);
std::string systemToUtf8(const std::string& str);

std::string toLower(const std::string& str);
std::string textToHtml(const std::string& str);
Expand Down
31 changes: 26 additions & 5 deletions src/cpp/r/RUtil.cpp
Expand Up @@ -15,8 +15,10 @@
#include <r/RUtil.hpp>

#include <boost/algorithm/string/replace.hpp>
#include <boost/regex.hpp>

#include <core/FilePath.hpp>
#include <core/StringUtils.hpp>

#include <r/RExec.hpp>

Expand All @@ -26,12 +28,12 @@ using namespace core;

namespace r {
namespace util {

std::string expandFileName(const std::string& name)
{
return std::string(R_ExpandFileName(name.c_str()));
}

std::string fixPath(const std::string& path)
{
// R sometimes gives us a path a double slashes in it ("//"). Eliminate them.
Expand All @@ -55,9 +57,28 @@ bool hasRequiredVersion(const std::string& version)
return hasRequired;
}
}


} // namespace util

std::string rconsole2utf8(const std::string& encoded)
{
boost::regex utf8("\x02\xFF\xFE(.*?)(\x03\xFF\xFE|\\')");

std::string output;
std::string::const_iterator pos = encoded.begin();
boost::smatch m;
while (pos != encoded.end() && boost::regex_search(pos, encoded.end(), m, utf8))
{
if (pos < m[0].first)
output.append(string_utils::systemToUtf8(std::string(pos, m[0].first)));
output.append(m[1].first, m[1].second);
pos = m[0].second;
}
if (pos != encoded.end())
output.append(string_utils::systemToUtf8(std::string(pos, encoded.end())));

return output;
}

} // namespace util
} // namespace r


Expand Down
2 changes: 2 additions & 0 deletions src/cpp/r/include/r/RUtil.hpp
Expand Up @@ -29,6 +29,8 @@ std::string fixPath(const std::string& path);

bool hasRequiredVersion(const std::string& version);

std::string rconsole2utf8(const std::string& encoded);

} // namespace util
} // namespace r

Expand Down
1 change: 1 addition & 0 deletions src/cpp/r/session/RSession.cpp
Expand Up @@ -655,6 +655,7 @@ void RWriteConsoleEx (const char *buf, int buflen, int otype)
{
// get output
std::string output = std::string(buf,buflen);
output = util::rconsole2utf8(output);

// add to console actions
int type = otype == 1 ? kConsoleActionOutputError :
Expand Down
4 changes: 2 additions & 2 deletions src/cpp/session/SessionMain.cpp
Expand Up @@ -850,7 +850,7 @@ Error extractConsoleInput(const json::JsonRpcRequest& request)
{
// get console input to return to R
std::string text = request.params[0].get_str();
string_utils::utf8ToSystem(text, &text, true);
text = string_utils::utf8ToSystem(text, true);
addToConsoleInputBuffer(r::session::RConsoleInput(text));

// return success
Expand Down Expand Up @@ -1222,7 +1222,7 @@ void rBusy(bool busy)

void rConsoleWrite(const std::string& output, int otype)
{
int event = otype == 1 ? kConsoleWriteError : kConsoleWriteOutput;
int event = otype == 1 ? kConsoleWriteError : kConsoleWriteOutput;
ClientEvent writeEvent(event, output);
session::clientEventQueue().add(writeEvent);
}
Expand Down

0 comments on commit b6d47e7

Please sign in to comment.