Skip to content

Commit

Permalink
[libc++][format] Improves fill character.
Browse files Browse the repository at this point in the history
The main change is to allow a UCS scalar value as fill character.
Especially for char based formatting this increase the number of valid
characters. Originally this was to be expected ABI breaking, however the
current change does not seem to break the ABI.

Implements
- P2572 std::format() fill character allowances

Depends on D144499

Reviewed By: ldionne, tahonermann, #libc

Differential Revision: https://reviews.llvm.org/D144742
  • Loading branch information
mordante committed May 19, 2023
1 parent 4df44a0 commit 5db033e
Show file tree
Hide file tree
Showing 9 changed files with 289 additions and 18 deletions.
1 change: 1 addition & 0 deletions libcxx/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Implemented Papers
- P2675R1 - ``format``'s width estimation is too approximate and not forward compatible
- P2505R5 - Monadic operations for ``std::expected``
- P2711R1 - Making Multi-Param Constructors Of views explicit (``join_with_view`` is not done yet)
- P2572R1 - ``std::format`` fill character allowances

Improvements and New Features
-----------------------------
Expand Down
2 changes: 1 addition & 1 deletion libcxx/docs/Status/Cxx2bPapers.csv
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
"`P2609R3 <https://wg21.link/P2609R3>`__","LWG", "Relaxing Ranges Just A Smidge","February 2023","","","|ranges|"
"`P2713R1 <https://wg21.link/P2713R1>`__","LWG", "Escaping improvements in ``std::format``","February 2023","","","|format|"
"`P2675R1 <https://wg21.link/P2675R1>`__","LWG", "``format``'s width estimation is too approximate and not forward compatible","February 2023","|Complete|","17.0","|format|"
"`P2572R1 <https://wg21.link/P2572R1>`__","LWG", "``std::format`` fill character allowances","February 2023","","","|format|"
"`P2572R1 <https://wg21.link/P2572R1>`__","LWG", "``std::format`` fill character allowances","February 2023","|Complete|","17.0","|format|"
"`P2693R1 <https://wg21.link/P2693R1>`__","LWG", "Formatting ``thread::id`` and ``stacktrace``","February 2023","|Partial| [#note-P2693R1]_","","|format|"
"`P2679R2 <https://wg21.link/P2679R2>`__","LWG", "Fixing ``std::start_lifetime_as`` for arrays","February 2023","","",""
"`P2674R1 <https://wg21.link/P2674R1>`__","LWG", "A trait for implicit lifetime types","February 2023","","",""
Expand Down
2 changes: 1 addition & 1 deletion libcxx/docs/Status/FormatIssues.csv
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Number,Name,Standard,Assignee,Status,First released version
"`P2539R4 <https://wg21.link/P2539R4>`__","Should the output of ``std::print`` to a terminal be synchronized with the underlying stream?","C++23","Mark de Wever"
"`P2713R1 <https://wg21.link/P2713R1>`__","Escaping improvements in ``std::format``","C++23","Mark de Wever",""
"`P2675R1 <https://wg21.link/P2675R1>`__","``format``'s width estimation is too approximate and not forward compatible","C++23","Mark de Wever","|Complete|", Clang 17
"`P2572R1 <https://wg21.link/P2572R1>`__","``std::format`` fill character allowances","C++23","Mark de Wever","|In progress|"
"`P2572R1 <https://wg21.link/P2572R1>`__","``std::format`` fill character allowances","C++23","Mark de Wever","|Complete|", Clang 17
"`P2693R1 <https://wg21.link/P2693R1>`__","Formatting ``thread::id`` and ``stacktrace``","C++23","Mark de Wever","|In progress|"
`P1361 <https://wg21.link/P1361>`_,"Integration of chrono with text formatting","C++20",Mark de Wever,|In Progress|,
`P2372 <https://wg21.link/P2372>`__,"Fixing locale handling in chrono formatters","C++20",Mark de Wever,|In Progress|,
Expand Down
4 changes: 2 additions & 2 deletions libcxx/include/__format/formatter_floating_point.h
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,7 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __format_locale_specific_form(
if (__size < __specs.__width_) {
if (__zero_padding) {
__specs.__alignment_ = __format_spec::__alignment::__right;
__specs.__fill_ = _CharT('0');
__specs.__fill_.__data[0] = _CharT('0');
}

__padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_);
Expand Down Expand Up @@ -712,7 +712,7 @@ __format_floating_point(_Tp __value, _FormatContext& __ctx, __format_spec::__par
// After the sign is written, zero padding is the same a right alignment
// with '0'.
__specs.__alignment_ = __format_spec::__alignment::__right;
__specs.__fill_ = _CharT('0');
__specs.__fill_.__data[0] = _CharT('0');
}

if (__num_trailing_zeros)
Expand Down
2 changes: 1 addition & 1 deletion libcxx/include/__format/formatter_integral.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_integer(
// - Write data right aligned with '0' as fill character.
__out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
__specs.__alignment_ = __format_spec::__alignment::__right;
__specs.__fill_ = _CharT('0');
__specs.__fill_.__data[0] = _CharT('0');
int32_t __size = __first - __begin;

__specs.__width_ -= _VSTD::min(__size, __specs.__width_);
Expand Down
41 changes: 41 additions & 0 deletions libcxx/include/__format/formatter_output.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <__algorithm/ranges_fill_n.h>
#include <__algorithm/ranges_for_each.h>
#include <__algorithm/ranges_transform.h>
#include <__bit/countl.h>
#include <__charconv/to_chars_integral.h>
#include <__charconv/to_chars_result.h>
#include <__chrono/statically_widen.h>
Expand Down Expand Up @@ -166,6 +167,46 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, _CharT __value)
}
}

# ifndef _LIBCPP_HAS_NO_UNICODE
template <__fmt_char_type _CharT, output_iterator<const _CharT&> _OutIt>
requires(same_as<_CharT, char>)
_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) {
std::size_t __bytes = std::countl_one(static_cast<unsigned char>(__value.__data[0]));
if (__bytes == 0)
return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);

for (size_t __i = 0; __i < __n; ++__i)
__out_it = __formatter::__copy(
std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + __bytes, std::move(__out_it));
return __out_it;
}

# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <__fmt_char_type _CharT, output_iterator<const _CharT&> _OutIt>
requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2)
_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) {
if (!__unicode::__is_high_surrogate(__value.__data[0]))
return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);

for (size_t __i = 0; __i < __n; ++__i)
__out_it = __formatter::__copy(
std::addressof(__value.__data[0]), std::addressof(__value.__data[0]) + 2, std::move(__out_it));
return __out_it;
}

template <__fmt_char_type _CharT, output_iterator<const _CharT&> _OutIt>
requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4)
_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) {
return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);
}
# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS
# else // _LIBCPP_HAS_NO_UNICODE
template <__fmt_char_type _CharT, output_iterator<const _CharT&> _OutIt>
_LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) {
return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]);
}
# endif // _LIBCPP_HAS_NO_UNICODE

template <class _OutIt, class _CharT>
_LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first,
const char* __last, string&& __grouping, _CharT __sep,
Expand Down
116 changes: 103 additions & 13 deletions libcxx/include/__format/parser_std_format_spec.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
/// This header has some support for the chrono-format-spec since it doesn't
/// affect the std-format-spec.

#include <__algorithm/copy_n.h>
#include <__algorithm/find_if.h>
#include <__algorithm/min.h>
#include <__assert>
Expand All @@ -31,6 +32,7 @@
#include <__format/width_estimation_table.h>
#include <__iterator/concepts.h>
#include <__iterator/readable_traits.h> // iter_value_t
#include <__memory/addressof.h>
#include <__type_traits/common_type.h>
#include <__type_traits/is_trivially_copyable.h>
#include <__variant/monostate.h>
Expand Down Expand Up @@ -220,6 +222,25 @@ struct __chrono {
bool __month_name_ : 1;
};

// The fill UCS scalar value.
//
// This is always an array, with 1, 2, or 4 elements.
// The size of the data structure is always 32-bits.
template <class _CharT>
struct __code_point;

template <>
struct __code_point<char> {
char __data[4] = {' '};
};

# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <>
struct __code_point<wchar_t> {
wchar_t __data[4 / sizeof(wchar_t)] = {L' '};
};
# endif

/// Contains the parsed formatting specifications.
///
/// This contains information for both the std-format-spec and the
Expand Down Expand Up @@ -255,7 +276,7 @@ struct __parsed_specifications {
/// replaced with the value of that arg-id.
int32_t __precision_;

_CharT __fill_;
__code_point<_CharT> __fill_;

_LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }

Expand Down Expand Up @@ -385,11 +406,7 @@ class _LIBCPP_TEMPLATE_VIS __parser {
/// The requested precision, either the value or the arg-id.
int32_t __precision_{-1};

// LWG 3576 will probably change this to always accept a Unicode code point
// To avoid changing the size with that change align the field so when it
// becomes 32-bit its alignment will remain the same. That also means the
// size will remain the same. (D2572 addresses the solution for LWG 3576.)
_CharT __fill_{_CharT(' ')};
__code_point<_CharT> __fill_{};

private:
_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
Expand All @@ -409,19 +426,90 @@ class _LIBCPP_TEMPLATE_VIS __parser {
return false;
}

_LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill, bool __use_range_fill) {
// The forbidden fill characters all code points formed from a single code unit, thus the
// check can be omitted when more code units are used.
if (__use_range_fill && (__fill == _CharT('{') || __fill == _CharT('}') || __fill == _CharT(':')))
std::__throw_format_error("The format-spec range-fill field contains an invalid character");
else if (__fill == _CharT('{') || __fill == _CharT('}'))
std::__throw_format_error("The format-spec fill field contains an invalid character");
}

# ifndef _LIBCPP_HAS_NO_UNICODE
// range-fill and tuple-fill are identical
template <contiguous_iterator _Iterator>
requires same_as<_CharT, char>
# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
|| (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2)
# endif
_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
_LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause "
"undefined behavior by evaluating data not in the input");
_LIBCPP_ASSERT(__begin != __end,
"when called with an empty input the function will cause "
"undefined behavior by evaluating data not in the input");
__unicode::__code_point_view<_CharT> __view{__begin, __end};
__unicode::__consume_result __consumed = __view.__consume();
if (__consumed.__status != __unicode::__consume_result::__ok)
std::__throw_format_error("The format-spec contains malformed Unicode characters");

if (__view.__position() < __end && __parse_alignment(*__view.__position())) {
ptrdiff_t __code_units = __view.__position() - __begin;
if (__code_units == 1)
// The forbidden fill characters all are code points encoded
// in one code unit, thus the check can be omitted when more
// code units are used.
__validate_fill_character(*__begin, __use_range_fill);

std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0]));
__begin += __code_units + 1;
return true;
}

if (!__parse_alignment(*__begin))
return false;

++__begin;
return true;
}

# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
template <contiguous_iterator _Iterator>
requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4)
_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
_LIBCPP_ASSERT(__begin != __end,
"when called with an empty input the function will cause "
"undefined behavior by evaluating data not in the input");
if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) {
if (!__unicode::__is_scalar_value(*__begin))
std::__throw_format_error("The fill character contains an invalid value");

__validate_fill_character(*__begin, __use_range_fill);

__fill_.__data[0] = *__begin;
__begin += 2;
return true;
}

if (!__parse_alignment(*__begin))
return false;

++__begin;
return true;
}

# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS

# else // _LIBCPP_HAS_NO_UNICODE
// range-fill and tuple-fill are identical
template <contiguous_iterator _Iterator>
_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) {
_LIBCPP_ASSERT(__begin != __end,
"when called with an empty input the function will cause "
"undefined behavior by evaluating data not in the input");
if (__begin + 1 != __end) {
if (__parse_alignment(*(__begin + 1))) {
if (__use_range_fill && (*__begin == _CharT('{') || *__begin == _CharT('}') || *__begin == _CharT(':')))
std::__throw_format_error("The format-spec range-fill field contains an invalid character");
else if (*__begin == _CharT('{') || *__begin == _CharT('}'))
std::__throw_format_error("The format-spec fill field contains an invalid character");
__validate_fill_character(*__begin, __use_range_fill);

__fill_ = *__begin;
__fill_.__data[0] = *__begin;
__begin += 2;
return true;
}
Expand All @@ -434,6 +522,8 @@ class _LIBCPP_TEMPLATE_VIS __parser {
return true;
}

# endif // _LIBCPP_HAS_NO_UNICODE

template <contiguous_iterator _Iterator>
_LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) {
switch (*__begin) {
Expand Down
Loading

0 comments on commit 5db033e

Please sign in to comment.