From 45415ef91be5311939dfb0bf11a87b1722f68d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 21 Jan 2022 23:21:31 +0000 Subject: [PATCH] [libcxx] Fix the ctype `is` (pointer version) function for Windows Previously, this test snippet would report incorrect information: F::mask m; std::wstring in(L"\u00DA"); // LATIN CAPITAL LETTER U WITH ACUTE f.is(in.data(), in.data() + 1, &m); // m & F::lower would be set The single-character version of the `is` function wasn't affected by this issue though. Define `_LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA` for Windows, as the `alpha` / `_ALPHA` constant is a mask consisting of multiple bits set, which avoids setting `alpha` whenver any of the bits is set, in the `do_is` implementation. On Windows, with the "C" locale, wchars are classified according to their Unicode interpretation, just as in the en_US.UTF-8 locale on all platforms. Due to the differing classification of some characters, the `scan_is` and `scan_not` tests are quite annoying to fix, thus just ifdef out some of the tests for the "C" locale there - the code gets tested with the more standard en_US.UTF-8 locale anyway. Differential Revision: https://reviews.llvm.org/D120796 --- libcxx/include/__locale | 1 + .../locale.ctype.byname/is_1.pass.cpp | 8 +++++++- .../locale.ctype.byname/is_many.pass.cpp | 17 +++++++++++++---- .../locale.ctype.byname/scan_is.pass.cpp | 13 +++++++++---- .../locale.ctype.byname/scan_not.pass.cpp | 13 +++++++++---- 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 67fc9d1f58e6a..e1781986fd35a 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -454,6 +454,7 @@ public: static const mask blank = _BLANK; static const mask __regex_word = 0x4000; // 0x8000 and 0x0100 and 0x00ff are used # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT +# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA #elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) # ifdef __APPLE__ typedef __uint32_t mask; diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp index 04ab9101a544c..392e37373f81b 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_1.pass.cpp @@ -13,7 +13,6 @@ // bool is(mask m, charT c) const; // REQUIRES: locale.en_US.UTF-8 -// XFAIL: LIBCXX-WINDOWS-FIXME // XFAIL: libcpp-has-no-wide-characters #include @@ -107,8 +106,15 @@ int main(int, char**) assert(f.is(F::graph, L'.')); assert(!f.is(F::graph, L'\x07')); +#if defined(_WIN32) + // On Windows, these wchars are classified according to their + // Unicode interpretation even in the "C" locale. + assert(f.is(F::alpha, L'\x00DA')); + assert(f.is(F::upper, L'\x00DA')); +#else assert(!f.is(F::alpha, L'\x00DA')); assert(!f.is(F::upper, L'\x00DA')); +#endif } } diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp index 4f163d7f24621..1c7c1f81fc399 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/is_many.pass.cpp @@ -13,7 +13,6 @@ // const charT* do_is(const charT* low, const charT* high, mask* vec) const; // REQUIRES: locale.en_US.UTF-8 -// XFAIL: LIBCXX-WINDOWS-FIXME // XFAIL: libcpp-has-no-wide-characters #include @@ -149,17 +148,27 @@ int main(int, char**) // L'\x00DA' assert(!(m[0] & F::space)); - assert(!(m[0] & F::print)); assert(!(m[0] & F::cntrl)); - assert(!(m[0] & F::upper)); assert(!(m[0] & F::lower)); - assert(!(m[0] & F::alpha)); assert(!(m[0] & F::digit)); assert(!(m[0] & F::punct)); assert(!(m[0] & F::xdigit)); assert(!(m[0] & F::blank)); +#if defined(_WIN32) + // On Windows, these wchars are classified according to their + // Unicode interpretation even in the "C" locale. + assert( (m[0] & F::alpha)); + assert( (m[0] & F::upper)); + assert( (m[0] & F::print)); + assert( (m[0] & F::alnum)); + assert( (m[0] & F::graph)); +#else + assert(!(m[0] & F::alpha)); + assert(!(m[0] & F::upper)); + assert(!(m[0] & F::print)); assert(!(m[0] & F::alnum)); assert(!(m[0] & F::graph)); +#endif // L' ' assert( (m[1] & F::space)); diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp index 24d001000af70..163bd7a501d23 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_is.pass.cpp @@ -13,7 +13,6 @@ // const charT* scan_is(mask m, const charT* low, const charT* high) const; // REQUIRES: locale.en_US.UTF-8 -// XFAIL: LIBCXX-WINDOWS-FIXME // XFAIL: libcpp-has-no-wide-characters #include @@ -57,17 +56,23 @@ int main(int, char**) const std::wstring in(L"\x00DA A\x07.a1"); std::vector m(in.size()); assert(f.scan_is(F::space, in.data(), in.data() + in.size()) - in.data() == 1); - assert(f.scan_is(F::print, in.data(), in.data() + in.size()) - in.data() == 1); assert(f.scan_is(F::cntrl, in.data(), in.data() + in.size()) - in.data() == 3); - assert(f.scan_is(F::upper, in.data(), in.data() + in.size()) - in.data() == 2); assert(f.scan_is(F::lower, in.data(), in.data() + in.size()) - in.data() == 5); - assert(f.scan_is(F::alpha, in.data(), in.data() + in.size()) - in.data() == 2); assert(f.scan_is(F::digit, in.data(), in.data() + in.size()) - in.data() == 6); assert(f.scan_is(F::punct, in.data(), in.data() + in.size()) - in.data() == 4); assert(f.scan_is(F::xdigit, in.data(), in.data() + in.size()) - in.data() == 2); assert(f.scan_is(F::blank, in.data(), in.data() + in.size()) - in.data() == 1); +#if !defined(_WIN32) + // On Windows, these wchars are classified according to their + // Unicode interpretation even in the "C" locale, where + // the scan_is function returns the same as above for the + // en_US.UTF-8 locale. + assert(f.scan_is(F::print, in.data(), in.data() + in.size()) - in.data() == 1); + assert(f.scan_is(F::upper, in.data(), in.data() + in.size()) - in.data() == 2); + assert(f.scan_is(F::alpha, in.data(), in.data() + in.size()) - in.data() == 2); assert(f.scan_is(F::alnum, in.data(), in.data() + in.size()) - in.data() == 2); assert(f.scan_is(F::graph, in.data(), in.data() + in.size()) - in.data() == 2); +#endif } } diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp index 7e3c8183b1518..e2c34f2527fd4 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/locale.ctype.byname/scan_not.pass.cpp @@ -13,7 +13,6 @@ // const charT* scan_not(mask m, const charT* low, const charT* high) const; // REQUIRES: locale.en_US.UTF-8 -// XFAIL: LIBCXX-WINDOWS-FIXME // XFAIL: libcpp-has-no-wide-characters #include @@ -57,17 +56,23 @@ int main(int, char**) const std::wstring in(L"\x00DA A\x07.a1"); std::vector m(in.size()); assert(f.scan_not(F::space, in.data(), in.data() + in.size()) - in.data() == 0); - assert(f.scan_not(F::print, in.data(), in.data() + in.size()) - in.data() == 0); assert(f.scan_not(F::cntrl, in.data(), in.data() + in.size()) - in.data() == 0); - assert(f.scan_not(F::upper, in.data(), in.data() + in.size()) - in.data() == 0); assert(f.scan_not(F::lower, in.data(), in.data() + in.size()) - in.data() == 0); - assert(f.scan_not(F::alpha, in.data(), in.data() + in.size()) - in.data() == 0); assert(f.scan_not(F::digit, in.data(), in.data() + in.size()) - in.data() == 0); assert(f.scan_not(F::punct, in.data(), in.data() + in.size()) - in.data() == 0); assert(f.scan_not(F::xdigit, in.data(), in.data() + in.size()) - in.data() == 0); assert(f.scan_not(F::blank, in.data(), in.data() + in.size()) - in.data() == 0); +#if !defined(_WIN32) + // On Windows, these wchars are classified according to their + // Unicode interpretation even in the "C" locale, where + // the scan_is function returns the same as above for the + // en_US.UTF-8 locale. + assert(f.scan_not(F::print, in.data(), in.data() + in.size()) - in.data() == 0); + assert(f.scan_not(F::upper, in.data(), in.data() + in.size()) - in.data() == 0); + assert(f.scan_not(F::alpha, in.data(), in.data() + in.size()) - in.data() == 0); assert(f.scan_not(F::alnum, in.data(), in.data() + in.size()) - in.data() == 0); assert(f.scan_not(F::graph, in.data(), in.data() + in.size()) - in.data() == 0); +#endif } }