diff --git a/libc/docs/dev/printf_behavior.rst b/libc/docs/dev/printf_behavior.rst index bc60aa43ee2b6b..d0f7409336d436 100644 --- a/libc/docs/dev/printf_behavior.rst +++ b/libc/docs/dev/printf_behavior.rst @@ -164,6 +164,10 @@ If a number passed as a min width or precision value is out of range for an int, then it will be treated as the largest or smallest value in the int range (e.g. "%-999999999999.999999999999s" is the same as "%-2147483648.2147483647s"). +If a number passed as a bit width is less than or equal to zero, the conversion +is considered invalid. If the provided bit width is larger than the width of +uintmax_t, it will be clamped to the width of uintmax_t. + ---------- Conversion ---------- diff --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h index 54f0a870d0ac4a..a4bd3e667def35 100644 --- a/libc/src/stdio/printf_core/converter_utils.h +++ b/libc/src/stdio/printf_core/converter_utils.h @@ -18,7 +18,9 @@ namespace LIBC_NAMESPACE { namespace printf_core { -LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) { +LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, + LengthSpec length_spec) { + auto [lm, bw] = length_spec; switch (lm) { case LengthModifier::none: return num & cpp::numeric_limits::max(); @@ -40,6 +42,18 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) { return num & cpp::numeric_limits::max(); case LengthModifier::j: return num; // j is intmax, so no mask is necessary. + case LengthModifier::w: + case LengthModifier::wf: { + uintmax_t mask; + if (bw == 0) { + mask = 0; + } else if (bw < sizeof(uintmax_t) * CHAR_BIT) { + mask = (static_cast(1) << bw) - 1; + } else { + mask = UINTMAX_MAX; + } + return num & mask; + } } __builtin_unreachable(); } diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h index 7634d45568ab84..cd8bceb900de74 100644 --- a/libc/src/stdio/printf_core/core_structs.h +++ b/libc/src/stdio/printf_core/core_structs.h @@ -20,7 +20,12 @@ namespace printf_core { // These length modifiers match the length modifiers in the format string, which // is why they are formatted differently from the rest of the file. -enum class LengthModifier { hh, h, l, ll, j, z, t, L, none }; +enum class LengthModifier { hh, h, l, ll, j, z, t, L, w, wf, none }; + +struct LengthSpec { + LengthModifier lm; + size_t bit_width; +}; enum FormatFlags : uint8_t { LEFT_JUSTIFIED = 0x01, // - @@ -42,6 +47,7 @@ struct FormatSection { // Format Specifier Values FormatFlags flags = FormatFlags(0); LengthModifier length_modifier = LengthModifier::none; + size_t bit_width = 0; int min_width = 0; int precision = -1; @@ -64,6 +70,7 @@ struct FormatSection { if (!((static_cast(flags) == static_cast(other.flags)) && (min_width == other.min_width) && (precision == other.precision) && + (bit_width == other.bit_width) && (length_modifier == other.length_modifier) && (conv_name == other.conv_name))) return false; diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h index 2efbf53d409382..496e7bd1a56d94 100644 --- a/libc/src/stdio/printf_core/int_converter.h +++ b/libc/src/stdio/printf_core/int_converter.h @@ -71,7 +71,6 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) { uintmax_t num = static_cast(to_conv.conv_val_raw); bool is_negative = false; FormatFlags flags = to_conv.flags; - const char a = is_lower(to_conv.conv_name) ? 'a' : 'A'; // If the conversion is signed, then handle negative values. @@ -89,8 +88,8 @@ LIBC_INLINE int convert_int(Writer *writer, const FormatSection &to_conv) { ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX)); } - num = apply_length_modifier(num, to_conv.length_modifier); - + num = + apply_length_modifier(num, {to_conv.length_modifier, to_conv.bit_width}); cpp::array buf; auto str = details::num_to_strview(num, buf, to_conv.conv_name); if (!str) diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h index 1e7d2e58c924a6..fb5eb91ce02ea7 100644 --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -136,10 +136,10 @@ template class Parser { } } - LengthModifier lm = parse_length_modifier(&cur_pos); - + auto [lm, bw] = parse_length_modifier(&cur_pos); section.length_modifier = lm; section.conv_name = str[cur_pos]; + section.bit_width = bw; switch (str[cur_pos]) { case ('%'): // Regardless of options, a % conversion is always safe. The standard @@ -188,6 +188,21 @@ template class Parser { WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, ptrdiff_t, conv_index); break; + + case (LengthModifier::w): + case (LengthModifier::wf): + if (bw == 0) { + section.has_conv = false; + } else if (bw <= INT_WIDTH) { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, int, conv_index); + } else if (bw <= LONG_WIDTH) { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long, conv_index); + } else if (bw <= LLONG_WIDTH) { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, long long, conv_index); + } else { + WRITE_ARG_VAL_SIMPLEST(section.conv_val_raw, intmax_t, conv_index); + } + break; } break; #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT @@ -273,38 +288,54 @@ template class Parser { // assumes that str[*local_pos] is inside a format specifier. It returns a // LengthModifier with the length modifier it found. It will advance local_pos // after the format specifier if one is found. - LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) { + LIBC_INLINE LengthSpec parse_length_modifier(size_t *local_pos) { switch (str[*local_pos]) { case ('l'): if (str[*local_pos + 1] == 'l') { *local_pos += 2; - return LengthModifier::ll; + return {LengthModifier::ll, 0}; } else { ++*local_pos; - return LengthModifier::l; + return {LengthModifier::l, 0}; } + case ('w'): { + LengthModifier lm; + if (str[*local_pos + 1] == 'f') { + *local_pos += 2; + lm = LengthModifier::wf; + } else { + ++*local_pos; + lm = LengthModifier::w; + } + if (internal::isdigit(str[*local_pos])) { + const auto result = internal::strtointeger(str + *local_pos, 10); + *local_pos += result.parsed_len; + return {lm, static_cast(cpp::max(0, result.value))}; + } + return {lm, 0}; + } case ('h'): if (str[*local_pos + 1] == 'h') { *local_pos += 2; - return LengthModifier::hh; + return {LengthModifier::hh, 0}; } else { ++*local_pos; - return LengthModifier::h; + return {LengthModifier::h, 0}; } case ('L'): ++*local_pos; - return LengthModifier::L; + return {LengthModifier::L, 0}; case ('j'): ++*local_pos; - return LengthModifier::j; + return {LengthModifier::j, 0}; case ('z'): ++*local_pos; - return LengthModifier::z; + return {LengthModifier::z, 0}; case ('t'): ++*local_pos; - return LengthModifier::t; + return {LengthModifier::t, 0}; default: - return LengthModifier::none; + return {LengthModifier::none, 0}; } } @@ -460,7 +491,7 @@ template class Parser { } } - LengthModifier lm = parse_length_modifier(&local_pos); + auto [lm, bw] = parse_length_modifier(&local_pos); // if we don't have an index for this conversion, then its position is // unknown and all this information is irrelevant. The rest of this @@ -511,6 +542,18 @@ template class Parser { case (LengthModifier::t): conv_size = type_desc_from_type(); break; + case (LengthModifier::w): + case (LengthModifier::wf): + if (bw <= INT_WIDTH) { + conv_size = type_desc_from_type(); + } else if (bw <= LONG_WIDTH) { + conv_size = type_desc_from_type(); + } else if (bw <= LLONG_WIDTH) { + conv_size = type_desc_from_type(); + } else { + conv_size = type_desc_from_type(); + } + break; } break; #ifndef LIBC_COPT_PRINTF_DISABLE_FLOAT diff --git a/libc/src/stdio/printf_core/write_int_converter.h b/libc/src/stdio/printf_core/write_int_converter.h index 0310905f36f146..18aa5c79897ec4 100644 --- a/libc/src/stdio/printf_core/write_int_converter.h +++ b/libc/src/stdio/printf_core/write_int_converter.h @@ -55,6 +55,8 @@ LIBC_INLINE int convert_write_int(Writer *writer, *reinterpret_cast(to_conv.conv_val_ptr) = written; break; case LengthModifier::j: + case LengthModifier::w: + case LengthModifier::wf: *reinterpret_cast(to_conv.conv_val_ptr) = written; break; } diff --git a/libc/test/UnitTest/PrintfMatcher.cpp b/libc/test/UnitTest/PrintfMatcher.cpp index 32f3be73307e3a..c8303815c92296 100644 --- a/libc/test/UnitTest/PrintfMatcher.cpp +++ b/libc/test/UnitTest/PrintfMatcher.cpp @@ -39,6 +39,10 @@ namespace { case (LengthModifier::lm): \ tlog << #lm; \ break +#define CASE_LM_BIT_WIDTH(lm, bw) \ + case (LengthModifier::lm): \ + tlog << #lm << "\n\tbit width: :" << bw; \ + break static void display(FormatSection form) { tlog << "Raw String (len " << form.raw_string.size() << "): \""; @@ -67,6 +71,8 @@ static void display(FormatSection form) { CASE_LM(z); CASE_LM(t); CASE_LM(L); + CASE_LM_BIT_WIDTH(w, form.bit_width); + CASE_LM_BIT_WIDTH(wf, form.bit_width); } tlog << "\n"; tlog << "\tconversion name: " << form.conv_name << "\n"; diff --git a/libc/test/src/stdio/printf_core/parser_test.cpp b/libc/test/src/stdio/printf_core/parser_test.cpp index 0134277c4a1b2d..66d6dd0a86c422 100644 --- a/libc/test/src/stdio/printf_core/parser_test.cpp +++ b/libc/test/src/stdio/printf_core/parser_test.cpp @@ -223,6 +223,42 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) { ASSERT_PFORMAT_EQ(expected, format_arr[0]); } +TEST(LlvmLibcPrintfParserTest, EvalOneArgWithBitWidthLengthModifier) { + LIBC_NAMESPACE::printf_core::FormatSection format_arr[10]; + const char *str = "%w32d"; + long long arg1 = 12345; + evaluate(format_arr, str, arg1); + + LIBC_NAMESPACE::printf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = {str, 5}; + expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::w; + expected.bit_width = 32; + expected.conv_val_raw = arg1; + expected.conv_name = 'd'; + + ASSERT_PFORMAT_EQ(expected, format_arr[0]); +} + +TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFastBitWidthLengthModifier) { + LIBC_NAMESPACE::printf_core::FormatSection format_arr[10]; + const char *str = "%wf32d"; + long long arg1 = 12345; + evaluate(format_arr, str, arg1); + + LIBC_NAMESPACE::printf_core::FormatSection expected; + expected.has_conv = true; + + expected.raw_string = {str, 6}; + expected.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::wf; + expected.bit_width = 32; + expected.conv_val_raw = arg1; + expected.conv_name = 'd'; + + ASSERT_PFORMAT_EQ(expected, format_arr[0]); +} + TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) { LIBC_NAMESPACE::printf_core::FormatSection format_arr[10]; const char *str = "% -056.78jd"; diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp index 186b37e2898af6..4e1171ae65daf8 100644 --- a/libc/test/src/stdio/sprintf_test.cpp +++ b/libc/test/src/stdio/sprintf_test.cpp @@ -169,6 +169,92 @@ TEST(LlvmLibcSPrintfTest, IntConv) { EXPECT_EQ(written, 20); ASSERT_STREQ(buff, "-9223372036854775808"); // ll min + written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 5807); + EXPECT_EQ(written, 1); + ASSERT_STREQ(buff, "7"); + + written = LIBC_NAMESPACE::sprintf(buff, "%w3d", 1); + EXPECT_EQ(written, 1); + ASSERT_STREQ(buff, "1"); + + written = LIBC_NAMESPACE::sprintf(buff, "%w64d", 9223372036854775807ll); + EXPECT_EQ(written, 19); + ASSERT_STREQ(buff, "9223372036854775807"); + + written = LIBC_NAMESPACE::sprintf(buff, "%w-1d", 5807); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "%w-1d"); + + written = LIBC_NAMESPACE::sprintf(buff, "%w0d", 5807); + EXPECT_EQ(written, 4); + ASSERT_STREQ(buff, "%w0d"); + + written = LIBC_NAMESPACE::sprintf(buff, "%w999d", 9223372036854775807ll); + EXPECT_EQ(written, 19); + ASSERT_STREQ(buff, "9223372036854775807"); + + written = LIBC_NAMESPACE::sprintf(buff, "%winvalid%w1d", 5807, 5807); + EXPECT_EQ(written, 10); + ASSERT_STREQ(buff, "%winvalid1"); + + written = LIBC_NAMESPACE::sprintf(buff, "%w-1d%w1d", 5807, 5807); + EXPECT_EQ(written, 6); + ASSERT_STREQ(buff, "%w-1d1"); + + char format[64]; + char uintmax[128]; + LIBC_NAMESPACE::sprintf(format, "%%w%du", UINTMAX_WIDTH); + const int uintmax_len = LIBC_NAMESPACE::sprintf(uintmax, "%ju", UINTMAX_MAX); + written = LIBC_NAMESPACE::sprintf(buff, format, UINTMAX_MAX); + EXPECT_EQ(written, uintmax_len); + ASSERT_STREQ(buff, uintmax); + + written = LIBC_NAMESPACE::sprintf(buff, "%w64u", 18446744073709551615ull); + EXPECT_EQ(written, 20); + ASSERT_STREQ(buff, "18446744073709551615"); // ull max + + written = + LIBC_NAMESPACE::sprintf(buff, "%w64d", -9223372036854775807ll - 1ll); + EXPECT_EQ(written, 20); + ASSERT_STREQ(buff, "-9223372036854775808"); // ll min + + written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 5807); + EXPECT_EQ(written, 1); + ASSERT_STREQ(buff, "7"); + + written = LIBC_NAMESPACE::sprintf(buff, "%wf3d", 1); + EXPECT_EQ(written, 1); + ASSERT_STREQ(buff, "1"); + + written = LIBC_NAMESPACE::sprintf(buff, "%wf64u", 18446744073709551615ull); + EXPECT_EQ(written, 20); + ASSERT_STREQ(buff, "18446744073709551615"); // ull max + + written = + LIBC_NAMESPACE::sprintf(buff, "%wf64d", -9223372036854775807ll - 1ll); + EXPECT_EQ(written, 20); + ASSERT_STREQ(buff, "-9223372036854775808"); // ll min + + written = LIBC_NAMESPACE::sprintf(buff, "%wf0d", 5807); + EXPECT_EQ(written, 5); + ASSERT_STREQ(buff, "%wf0d"); + + written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d", 5807); + EXPECT_EQ(written, 6); + ASSERT_STREQ(buff, "%wf-1d"); + + written = LIBC_NAMESPACE::sprintf(buff, "%wfinvalid%wf1d", 5807, 5807); + EXPECT_EQ(written, 11); + ASSERT_STREQ(buff, "%wfinvalid1"); + + written = LIBC_NAMESPACE::sprintf(buff, "%wf-1d%wf1d", 5807, 5807); + EXPECT_EQ(written, 7); + ASSERT_STREQ(buff, "%wf-1d1"); + + written = LIBC_NAMESPACE::sprintf(buff, "%wf999d", 9223372036854775807ll); + EXPECT_EQ(written, 19); + ASSERT_STREQ(buff, "9223372036854775807"); + // Min Width Tests. written = LIBC_NAMESPACE::sprintf(buff, "%4d", 789);