diff --git a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h index e3583634e4322..7065874ea6d32 100644 --- a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h +++ b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h @@ -45,7 +45,11 @@ struct __libcpp_locale_guard { // each category. In the second case, we know at least one category won't // be what we want, so we only have to check the first case. if (std::strcmp(__l.__get_locale(), __lc) != 0) { - __locale_all = _strdup(__lc); + // Use wsetlocale to query the current locale string. This avoids a lossy + // conversion of the locale string from UTF-16 to the current LC_CTYPE + // charset. The Windows CRT allows language / country strings outside of + // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8". + __locale_all = _wcsdup(__wsetlocale(nullptr)); if (__locale_all == nullptr) __throw_bad_alloc(); __setlocale(__l.__get_locale()); @@ -57,7 +61,7 @@ struct __libcpp_locale_guard { // for the different categories in the same format as returned by // setlocale(LC_ALL, nullptr). if (__locale_all != nullptr) { - __setlocale(__locale_all); + __wsetlocale(__locale_all); free(__locale_all); } _configthreadlocale(__status); @@ -68,8 +72,14 @@ struct __libcpp_locale_guard { __throw_bad_alloc(); return __new_locale; } + static const wchar_t* __wsetlocale(const wchar_t* __locale) { + const wchar_t* __new_locale = _wsetlocale(LC_ALL, __locale); + if (__new_locale == nullptr) + __throw_bad_alloc(); + return __new_locale; + } int __status; - char* __locale_all = nullptr; + wchar_t* __locale_all = nullptr; }; #endif diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h index 0df8709f118d0..39391ea1fd9c4 100644 --- a/libcxx/include/__locale_dir/support/windows.h +++ b/libcxx/include/__locale_dir/support/windows.h @@ -162,6 +162,12 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __loc std::__throw_bad_alloc(); return __new_locale; } +inline _LIBCPP_HIDE_FROM_ABI wchar_t* __wsetlocale(int __category, const wchar_t* __locale) { + wchar_t* __new_locale = ::_wsetlocale(__category, __locale); + if (__new_locale == nullptr) + std::__throw_bad_alloc(); + return __new_locale; +} _LIBCPP_EXPORTED_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc); #endif // _LIBCPP_BUILDING_LIBRARY @@ -309,7 +315,11 @@ struct __locale_guard { // each category. In the second case, we know at least one category won't // be what we want, so we only have to check the first case. if (std::strcmp(__l.__get_locale(), __lc) != 0) { - __locale_all = _strdup(__lc); + // Use wsetlocale to query the current locale string. This avoids a lossy + // conversion of the locale string from UTF-16 to the current LC_CTYPE + // charset. The Windows CRT allows language / country strings outside of + // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8". + __locale_all = _wcsdup(__locale::__wsetlocale(LC_ALL, nullptr)); if (__locale_all == nullptr) std::__throw_bad_alloc(); __locale::__setlocale(LC_ALL, __l.__get_locale()); @@ -321,13 +331,13 @@ struct __locale_guard { // for the different categories in the same format as returned by // setlocale(LC_ALL, nullptr). if (__locale_all != nullptr) { - __locale::__setlocale(LC_ALL, __locale_all); + __locale::__wsetlocale(LC_ALL, __locale_all); free(__locale_all); } _configthreadlocale(__status); } int __status; - char* __locale_all = nullptr; + wchar_t* __locale_all = nullptr; }; #endif // _LIBCPP_BUILDING_LIBRARY