Skip to content

Commit

Permalink
[libcxx] Fix the ctype is (pointer version) function for Windows
Browse files Browse the repository at this point in the history
Previously, this test snippet would report incorrect information:

    F::mask m;
    std::wstring in(L"\u00DA"); // LATIN CAPITAL LETTER U WITH ACUTE
    f.is(in.data(), in.data() + 1, &m);
    // m & F::lower would be set

The single-character version of the `is` function wasn't
affected by this issue though.

Define `_LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA` for Windows,
as the `alpha` / `_ALPHA` constant is a mask consisting of
multiple bits set, which avoids setting `alpha` whenver any
of the bits is set, in the `do_is` implementation.

On Windows, with the "C" locale, wchars are classified according
to their Unicode interpretation, just as in the en_US.UTF-8 locale on
all platforms.

Due to the differing classification of some characters, the
`scan_is` and `scan_not` tests are quite annoying to fix, thus just
ifdef out some of the tests for the "C" locale there - the code gets
tested with the more standard en_US.UTF-8 locale anyway.

Differential Revision: https://reviews.llvm.org/D120796
  • Loading branch information
mstorsjo committed Mar 4, 2022
1 parent 3347e7d commit 45415ef
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 13 deletions.
1 change: 1 addition & 0 deletions libcxx/include/__locale
Expand Up @@ -454,6 +454,7 @@ public:
static const mask blank = _BLANK;
static const mask __regex_word = 0x4000; // 0x8000 and 0x0100 and 0x00ff are used
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT
# define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA
#elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__)
# ifdef __APPLE__
typedef __uint32_t mask;
Expand Down
Expand Up @@ -13,7 +13,6 @@
// bool is(mask m, charT c) const;

// REQUIRES: locale.en_US.UTF-8
// XFAIL: LIBCXX-WINDOWS-FIXME
// XFAIL: libcpp-has-no-wide-characters

#include <locale>
Expand Down Expand Up @@ -107,8 +106,15 @@ int main(int, char**)
assert(f.is(F::graph, L'.'));
assert(!f.is(F::graph, L'\x07'));

#if defined(_WIN32)
// On Windows, these wchars are classified according to their
// Unicode interpretation even in the "C" locale.
assert(f.is(F::alpha, L'\x00DA'));
assert(f.is(F::upper, L'\x00DA'));
#else
assert(!f.is(F::alpha, L'\x00DA'));
assert(!f.is(F::upper, L'\x00DA'));
#endif
}
}

Expand Down
Expand Up @@ -13,7 +13,6 @@
// const charT* do_is(const charT* low, const charT* high, mask* vec) const;

// REQUIRES: locale.en_US.UTF-8
// XFAIL: LIBCXX-WINDOWS-FIXME
// XFAIL: libcpp-has-no-wide-characters

#include <locale>
Expand Down Expand Up @@ -149,17 +148,27 @@ int main(int, char**)

// L'\x00DA'
assert(!(m[0] & F::space));
assert(!(m[0] & F::print));
assert(!(m[0] & F::cntrl));
assert(!(m[0] & F::upper));
assert(!(m[0] & F::lower));
assert(!(m[0] & F::alpha));
assert(!(m[0] & F::digit));
assert(!(m[0] & F::punct));
assert(!(m[0] & F::xdigit));
assert(!(m[0] & F::blank));
#if defined(_WIN32)
// On Windows, these wchars are classified according to their
// Unicode interpretation even in the "C" locale.
assert( (m[0] & F::alpha));
assert( (m[0] & F::upper));
assert( (m[0] & F::print));
assert( (m[0] & F::alnum));
assert( (m[0] & F::graph));
#else
assert(!(m[0] & F::alpha));
assert(!(m[0] & F::upper));
assert(!(m[0] & F::print));
assert(!(m[0] & F::alnum));
assert(!(m[0] & F::graph));
#endif

// L' '
assert( (m[1] & F::space));
Expand Down
Expand Up @@ -13,7 +13,6 @@
// const charT* scan_is(mask m, const charT* low, const charT* high) const;

// REQUIRES: locale.en_US.UTF-8
// XFAIL: LIBCXX-WINDOWS-FIXME
// XFAIL: libcpp-has-no-wide-characters

#include <locale>
Expand Down Expand Up @@ -57,17 +56,23 @@ int main(int, char**)
const std::wstring in(L"\x00DA A\x07.a1");
std::vector<F::mask> m(in.size());
assert(f.scan_is(F::space, in.data(), in.data() + in.size()) - in.data() == 1);
assert(f.scan_is(F::print, in.data(), in.data() + in.size()) - in.data() == 1);
assert(f.scan_is(F::cntrl, in.data(), in.data() + in.size()) - in.data() == 3);
assert(f.scan_is(F::upper, in.data(), in.data() + in.size()) - in.data() == 2);
assert(f.scan_is(F::lower, in.data(), in.data() + in.size()) - in.data() == 5);
assert(f.scan_is(F::alpha, in.data(), in.data() + in.size()) - in.data() == 2);
assert(f.scan_is(F::digit, in.data(), in.data() + in.size()) - in.data() == 6);
assert(f.scan_is(F::punct, in.data(), in.data() + in.size()) - in.data() == 4);
assert(f.scan_is(F::xdigit, in.data(), in.data() + in.size()) - in.data() == 2);
assert(f.scan_is(F::blank, in.data(), in.data() + in.size()) - in.data() == 1);
#if !defined(_WIN32)
// On Windows, these wchars are classified according to their
// Unicode interpretation even in the "C" locale, where
// the scan_is function returns the same as above for the
// en_US.UTF-8 locale.
assert(f.scan_is(F::print, in.data(), in.data() + in.size()) - in.data() == 1);
assert(f.scan_is(F::upper, in.data(), in.data() + in.size()) - in.data() == 2);
assert(f.scan_is(F::alpha, in.data(), in.data() + in.size()) - in.data() == 2);
assert(f.scan_is(F::alnum, in.data(), in.data() + in.size()) - in.data() == 2);
assert(f.scan_is(F::graph, in.data(), in.data() + in.size()) - in.data() == 2);
#endif
}
}

Expand Down
Expand Up @@ -13,7 +13,6 @@
// const charT* scan_not(mask m, const charT* low, const charT* high) const;

// REQUIRES: locale.en_US.UTF-8
// XFAIL: LIBCXX-WINDOWS-FIXME
// XFAIL: libcpp-has-no-wide-characters

#include <locale>
Expand Down Expand Up @@ -57,17 +56,23 @@ int main(int, char**)
const std::wstring in(L"\x00DA A\x07.a1");
std::vector<F::mask> m(in.size());
assert(f.scan_not(F::space, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::print, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::cntrl, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::upper, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::lower, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::alpha, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::digit, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::punct, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::xdigit, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::blank, in.data(), in.data() + in.size()) - in.data() == 0);
#if !defined(_WIN32)
// On Windows, these wchars are classified according to their
// Unicode interpretation even in the "C" locale, where
// the scan_is function returns the same as above for the
// en_US.UTF-8 locale.
assert(f.scan_not(F::print, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::upper, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::alpha, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::alnum, in.data(), in.data() + in.size()) - in.data() == 0);
assert(f.scan_not(F::graph, in.data(), in.data() + in.size()) - in.data() == 0);
#endif
}
}

Expand Down

0 comments on commit 45415ef

Please sign in to comment.