Skip to content

Commit

Permalink
Merge pull request #5504
Browse files Browse the repository at this point in the history
eeca5ca epee: support unicode in parsed strings (moneromooo-monero)
3e11bb5 functional_tests: test creating wallets with local language names (moneromooo-monero)
  • Loading branch information
luigi1111 committed Aug 17, 2019
2 parents 50c8147 + 3e11bb5 commit 14602ba
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 24 deletions.
59 changes: 59 additions & 0 deletions contrib/epee/include/storages/parserse_base_utils.h
Expand Up @@ -31,6 +31,9 @@
#include <algorithm>
#include <boost/utility/string_ref.hpp>

#undef MONERO_DEFAULT_LOG_CATEGORY
#define MONERO_DEFAULT_LOG_CATEGORY "serialization"

namespace epee
{
namespace misc_utils
Expand Down Expand Up @@ -62,6 +65,26 @@ namespace misc_utils
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};

static const constexpr unsigned char isx[256] =
{
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
};

inline bool isspace(char c)
{
return lut[(uint8_t)c] & 8;
Expand Down Expand Up @@ -162,6 +185,42 @@ namespace misc_utils
val.push_back('\\');break;
case '/': //Slash character
val.push_back('/');break;
case 'u': //Unicode code point
if (buf_end - it < 4)
{
ASSERT_MES_AND_THROW("Invalid Unicode escape sequence");
}
else
{
uint32_t dst = 0;
for (int i = 0; i < 4; ++i)
{
const unsigned char tmp = isx[(int)*++it];
CHECK_AND_ASSERT_THROW_MES(tmp != 0xff, "Bad Unicode encoding");
dst = dst << 4 | tmp;
}
// encode as UTF-8
if (dst <= 0x7f)
{
val.push_back(dst);
}
else if (dst <= 0x7ff)
{
val.push_back(0xc0 | (dst >> 6));
val.push_back(0x80 | (dst & 0x3f));
}
else if (dst <= 0xffff)
{
val.push_back(0xe0 | (dst >> 12));
val.push_back(0x80 | ((dst >> 6) & 0x3f));
val.push_back(0x80 | (dst & 0x3f));
}
else
{
ASSERT_MES_AND_THROW("Unicode code point is out or range");
}
}
break;
default:
val.push_back(*it);
LOG_PRINT_L0("Unknown escape sequence :\"\\" << *it << "\"");
Expand Down
24 changes: 2 additions & 22 deletions contrib/epee/include/string_tools.h
Expand Up @@ -59,26 +59,6 @@
#pragma comment (lib, "Rpcrt4.lib")
#endif

static const constexpr unsigned char isx[256] =
{
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 10, 11, 12, 13, 14, 15, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
};

namespace epee
{
namespace string_tools
Expand All @@ -99,10 +79,10 @@ namespace string_tools
for(size_t i = 0; i < s.size(); i += 2)
{
int tmp = *src++;
tmp = isx[tmp];
tmp = epee::misc_utils::parse::isx[tmp];
if (tmp == 0xff) return false;
int t2 = *src++;
t2 = isx[t2];
t2 = epee::misc_utils::parse::isx[t2];
if (t2 == 0xff) return false;
*dst++ = (tmp << 4) | t2;
}
Expand Down
5 changes: 3 additions & 2 deletions tests/functional_tests/wallet_address.py
Expand Up @@ -198,8 +198,9 @@ def languages(self):
try: wallet.close_wallet()
except: pass
languages = res.languages
for language in languages:
print('Creating ' + str(language) + ' wallet')
languages_local = res.languages_local
for language in languages + languages_local:
print('Creating ' + language.encode('utf8') + ' wallet')
wallet.create_wallet(filename = '', language = language)
res = wallet.query_key('mnemonic')
wallet.close_wallet()
Expand Down
17 changes: 17 additions & 0 deletions tests/unit_tests/epee_utils.cpp
Expand Up @@ -946,3 +946,20 @@ TEST(parsing, number)
epee::misc_utils::parse::match_number(i, s.end(), val);
ASSERT_EQ(val, "+9.34e+03");
}

TEST(parsing, unicode)
{
std::string bs;
std::string s;
std::string::const_iterator si;

s = "\"\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "");
s = "\"\\u0000\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, std::string(1, '\0'));
s = "\"\\u0020\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, " ");
s = "\"\\u1\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs));
s = "\"\\u12\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs));
s = "\"\\u123\""; si = s.begin(); ASSERT_FALSE(epee::misc_utils::parse::match_string(si, s.end(), bs));
s = "\"\\u1234\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "");
s = "\"foo\\u1234bar\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "fooሴbar");
s = "\"\\u3042\\u307e\\u3084\\u304b\\u3059\""; si = s.begin(); ASSERT_TRUE(epee::misc_utils::parse::match_string(si, s.end(), bs)); ASSERT_EQ(bs, "あまやかす");
}

0 comments on commit 14602ba

Please sign in to comment.