diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt index 54fa40d5363b4..d0eaa1a44012e 100644 --- a/libc/src/stdio/scanf_core/CMakeLists.txt +++ b/libc/src/stdio/scanf_core/CMakeLists.txt @@ -82,19 +82,21 @@ add_object_library( float_converter.cpp HDRS converter.h + converter_utils.h int_converter.h string_converter.h float_converter.h + current_pos_converter.h DEPENDS .reader .core_structs + libc.src.__support.common libc.src.__support.ctype_utils libc.src.__support.CPP.bitset libc.src.__support.CPP.string_view libc.src.__support.CPP.limits libc.src.__support.char_vector - libc.include.errno - libc.src.errno.errno + libc.src.__support.str_to_float ) add_object_library( diff --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp index c5a2932fb18f8..053e215b4d983 100644 --- a/libc/src/stdio/scanf_core/converter.cpp +++ b/libc/src/stdio/scanf_core/converter.cpp @@ -15,6 +15,7 @@ #ifndef LLVM_LIBC_SCANF_DISABLE_FLOAT #include "src/stdio/scanf_core/float_converter.h" #endif // LLVM_LIBC_SCANF_DISABLE_FLOAT +#include "src/stdio/scanf_core/current_pos_converter.h" #include "src/stdio/scanf_core/int_converter.h" #include "src/stdio/scanf_core/string_converter.h" @@ -60,8 +61,8 @@ int convert(Reader *reader, const FormatSection &to_conv) { return ret_val; return convert_float(reader, to_conv); #endif // LLVM_LIBC_SCANF_DISABLE_FLOAT - // case 'n': - // return convert_write_int(reader, to_conv); + case 'n': + return convert_current_pos(reader, to_conv); // case 'p': // ret_val = raw_match(reader, " "); // if (ret_val != READ_OK) diff --git a/libc/src/stdio/scanf_core/converter_utils.h b/libc/src/stdio/scanf_core/converter_utils.h new file mode 100644 index 0000000000000..07ac9c7407ed3 --- /dev/null +++ b/libc/src/stdio/scanf_core/converter_utils.h @@ -0,0 +1,107 @@ +//===-- Format specifier converter for scanf -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_UTILS_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_UTILS_H + +#include "src/__support/common.h" +#include "src/__support/ctype_utils.h" +#include "src/__support/str_to_float.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +LIBC_INLINE constexpr char to_lower(char a) { return a | 32; } + +LIBC_INLINE constexpr int b36_char_to_int(char input) { + if (internal::isdigit(input)) + return input - '0'; + if (internal::isalpha(input)) + return to_lower(input) + 10 - 'a'; + return 0; +} + +LIBC_INLINE void write_int_with_length(uintmax_t output_val, + const FormatSection &to_conv) { + if ((to_conv.flags & NO_WRITE) != 0) { + return; + } + LengthModifier lm = to_conv.length_modifier; + void *output_ptr = to_conv.output_ptr; + switch (lm) { + case (LengthModifier::hh): + *reinterpret_cast(output_ptr) = + static_cast(output_val); + break; + case (LengthModifier::h): + *reinterpret_cast(output_ptr) = + static_cast(output_val); + break; + case (LengthModifier::NONE): + *reinterpret_cast(output_ptr) = + static_cast(output_val); + break; + case (LengthModifier::l): + *reinterpret_cast(output_ptr) = + static_cast(output_val); + break; + case (LengthModifier::ll): + case (LengthModifier::L): + *reinterpret_cast(output_ptr) = + static_cast(output_val); + break; + case (LengthModifier::j): + *reinterpret_cast(output_ptr) = + static_cast(output_val); + break; + case (LengthModifier::z): + *reinterpret_cast(output_ptr) = static_cast(output_val); + break; + case (LengthModifier::t): + *reinterpret_cast(output_ptr) = + static_cast(output_val); + break; + } +} + +LIBC_INLINE void write_float_with_length(char *str, + const FormatSection &to_conv) { + if ((to_conv.flags & NO_WRITE) != 0) { + return; + } + + void *output_ptr = to_conv.output_ptr; + + LengthModifier lm = to_conv.length_modifier; + switch (lm) { + case (LengthModifier::l): { + auto value = internal::strtofloatingpoint(str, nullptr); + *reinterpret_cast(output_ptr) = value; + break; + } + case (LengthModifier::L): { + auto value = internal::strtofloatingpoint(str, nullptr); + *reinterpret_cast(output_ptr) = value; + break; + } + default: { + auto value = internal::strtofloatingpoint(str, nullptr); + *reinterpret_cast(output_ptr) = value; + break; + } + } +} + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_UTILS_H diff --git a/libc/src/stdio/scanf_core/current_pos_converter.h b/libc/src/stdio/scanf_core/current_pos_converter.h new file mode 100644 index 0000000000000..be4b6553b89dc --- /dev/null +++ b/libc/src/stdio/scanf_core/current_pos_converter.h @@ -0,0 +1,31 @@ +//===-- Current position specifier converter for scanf ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CURRENT_POS_CONVERTER_H +#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CURRENT_POS_CONVERTER_H + +#include "src/__support/common.h" +#include "src/stdio/scanf_core/converter_utils.h" +#include "src/stdio/scanf_core/core_structs.h" +#include "src/stdio/scanf_core/reader.h" + +#include + +namespace __llvm_libc { +namespace scanf_core { + +LIBC_INLINE int convert_current_pos(Reader *reader, + const FormatSection &to_conv) { + write_int_with_length(reader->chars_read(), to_conv); + return READ_OK; +} + +} // namespace scanf_core +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CURRENT_POS_CONVERTER_H diff --git a/libc/src/stdio/scanf_core/float_converter.cpp b/libc/src/stdio/scanf_core/float_converter.cpp index 1a0ce42863f35..5ffb9b09fcfc2 100644 --- a/libc/src/stdio/scanf_core/float_converter.cpp +++ b/libc/src/stdio/scanf_core/float_converter.cpp @@ -11,7 +11,7 @@ #include "src/__support/CPP/limits.h" #include "src/__support/char_vector.h" #include "src/__support/ctype_utils.h" -#include "src/__support/str_to_float.h" +#include "src/stdio/scanf_core/converter_utils.h" #include "src/stdio/scanf_core/core_structs.h" #include "src/stdio/scanf_core/reader.h" @@ -20,35 +20,6 @@ namespace __llvm_libc { namespace scanf_core { -constexpr char inline to_lower(char a) { return a | 32; } - -void write_with_length(char *str, const FormatSection &to_conv) { - if ((to_conv.flags & NO_WRITE) != 0) { - return; - } - - void *output_ptr = to_conv.output_ptr; - - LengthModifier lm = to_conv.length_modifier; - switch (lm) { - case (LengthModifier::l): { - auto value = internal::strtofloatingpoint(str, nullptr); - *reinterpret_cast(output_ptr) = value; - break; - } - case (LengthModifier::L): { - auto value = internal::strtofloatingpoint(str, nullptr); - *reinterpret_cast(output_ptr) = value; - break; - } - default: { - auto value = internal::strtofloatingpoint(str, nullptr); - *reinterpret_cast(output_ptr) = value; - break; - } - } -} - // All of the floating point conversions are the same for scanf, every name will // accept every style. int convert_float(Reader *reader, const FormatSection &to_conv) { @@ -96,7 +67,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { } if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) { - write_with_length(out_str.c_str(), to_conv); + write_float_with_length(out_str.c_str(), to_conv); return READ_OK; } else { return MATCHING_FAILURE; @@ -119,7 +90,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { } if (nan_index == sizeof(nan_string) - 1) { - write_with_length(out_str.c_str(), to_conv); + write_float_with_length(out_str.c_str(), to_conv); return READ_OK; } else { return MATCHING_FAILURE; @@ -138,7 +109,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { } // If we've hit the end, then this is "0", which is valid. if (out_str.length() == max_width) { - write_with_length(out_str.c_str(), to_conv); + write_float_with_length(out_str.c_str(), to_conv); return READ_OK; } else { cur_char = reader->getc(); @@ -154,7 +125,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { // If we've hit the end here, we have "0x" which is a valid prefix to a // floating point number, and will be evaluated to 0. if (out_str.length() == max_width) { - write_with_length(out_str.c_str(), to_conv); + write_float_with_length(out_str.c_str(), to_conv); return READ_OK; } else { cur_char = reader->getc(); @@ -246,7 +217,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) { if (!is_number) { return MATCHING_FAILURE; } - write_with_length(out_str.c_str(), to_conv); + write_float_with_length(out_str.c_str(), to_conv); return READ_OK; } diff --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp index be88a01f942d7..ce23f5d3fed8d 100644 --- a/libc/src/stdio/scanf_core/int_converter.cpp +++ b/libc/src/stdio/scanf_core/int_converter.cpp @@ -10,6 +10,7 @@ #include "src/__support/CPP/limits.h" #include "src/__support/ctype_utils.h" +#include "src/stdio/scanf_core/converter_utils.h" #include "src/stdio/scanf_core/core_structs.h" #include "src/stdio/scanf_core/reader.h" @@ -18,58 +19,6 @@ namespace __llvm_libc { namespace scanf_core { -constexpr char inline to_lower(char a) { return a | 32; } - -constexpr inline int b36_char_to_int(char input) { - if (internal::isdigit(input)) - return input - '0'; - if (internal::isalpha(input)) - return to_lower(input) + 10 - 'a'; - return 0; -} - -void write_with_length(uintmax_t output_val, const FormatSection &to_conv) { - if ((to_conv.flags & NO_WRITE) != 0) { - return; - } - LengthModifier lm = to_conv.length_modifier; - void *output_ptr = to_conv.output_ptr; - switch (lm) { - case (LengthModifier::hh): - *reinterpret_cast(output_ptr) = - static_cast(output_val); - break; - case (LengthModifier::h): - *reinterpret_cast(output_ptr) = - static_cast(output_val); - break; - case (LengthModifier::NONE): - *reinterpret_cast(output_ptr) = - static_cast(output_val); - break; - case (LengthModifier::l): - *reinterpret_cast(output_ptr) = - static_cast(output_val); - break; - case (LengthModifier::ll): - case (LengthModifier::L): - *reinterpret_cast(output_ptr) = - static_cast(output_val); - break; - case (LengthModifier::j): - *reinterpret_cast(output_ptr) = - static_cast(output_val); - break; - case (LengthModifier::z): - *reinterpret_cast(output_ptr) = static_cast(output_val); - break; - case (LengthModifier::t): - *reinterpret_cast(output_ptr) = - static_cast(output_val); - break; - } -} - // This code is very similar to the code in __support/str_to_integer.h but is // not quite the same. Here is the list of differences and why they exist: // 1) This takes a reader and a format section instead of a char* and the base. @@ -150,7 +99,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { } else { // If the max width has been hit already, then the return value must be 0 // since no actual digits of the number have been parsed yet. - write_with_length(0, to_conv); + write_int_with_length(0, to_conv); return MATCHING_FAILURE; } } @@ -168,7 +117,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { --max_width; cur_char = reader->getc(); } else { - write_with_length(0, to_conv); + write_int_with_length(0, to_conv); return READ_OK; } @@ -179,7 +128,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { --max_width; cur_char = reader->getc(); } else { - write_with_length(0, to_conv); + write_int_with_length(0, to_conv); return READ_OK; } @@ -196,7 +145,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { // If the first character isn't a valid digit, then there are no valid // digits at all. The number is 0. reader->ungetc(cur_char); - write_with_length(0, to_conv); + write_int_with_length(0, to_conv); return MATCHING_FAILURE; } } @@ -249,12 +198,12 @@ int convert_int(Reader *reader, const FormatSection &to_conv) { reader->ungetc(cur_char); if (has_overflow) { - write_with_length(MAX, to_conv); + write_int_with_length(MAX, to_conv); } else { if (is_negative) result = -result; - write_with_length(result, to_conv); + write_int_with_length(result, to_conv); } if (!is_number) diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp index ed509eca4c66f..5a79d2e624ab0 100644 --- a/libc/src/stdio/scanf_core/scanf_main.cpp +++ b/libc/src/stdio/scanf_core/scanf_main.cpp @@ -29,7 +29,10 @@ int scanf_main(Reader *reader, const char *__restrict str, cur_section = parser.get_next_section()) { if (cur_section.has_conv) { ret_val = convert(reader, cur_section); - conversions += ret_val == READ_OK ? 1 : 0; + // The %n (current position) conversion doesn't increment the number of + // assignments. + if (cur_section.conv_name != 'n') + conversions += ret_val == READ_OK ? 1 : 0; } else { ret_val = raw_match(reader, cur_section.raw_string); } diff --git a/libc/test/src/stdio/sscanf_test.cpp b/libc/test/src/stdio/sscanf_test.cpp index fc67593f57f0b..55671279e1b4d 100644 --- a/libc/test/src/stdio/sscanf_test.cpp +++ b/libc/test/src/stdio/sscanf_test.cpp @@ -59,6 +59,20 @@ TEST(LlvmLibcSScanfTest, IntConvSimple) { EXPECT_EQ(ret_val, 1); EXPECT_EQ(result, 345); + // 288 characters + ret_val = __llvm_libc::sscanf("10000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000", + "%d", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, int(__llvm_libc::cpp::numeric_limits::max())); + ret_val = __llvm_libc::sscanf("Not an integer", "%d", &result); EXPECT_EQ(ret_val, 0); } @@ -445,11 +459,6 @@ TEST(LlvmLibcSScanfTest, FloatConvComplexParsing) { EXPECT_FP_EQ(result, 1.2); } -/* -TODO: - Max width tests -*/ - TEST(LlvmLibcSScanfTest, FloatConvMaxWidth) { int ret_val; float result = 0; @@ -572,6 +581,54 @@ TEST(LlvmLibcSScanfTest, FloatConvNoWrite) { EXPECT_EQ(ret_val, 0); } +TEST(LlvmLibcSScanfTest, CurPosCombined) { + int ret_val; + int result = -1; + char c_result = 0; + + ret_val = __llvm_libc::sscanf("some text", "%n", &result); + // %n doesn't count as a conversion for the return value. + EXPECT_EQ(ret_val, 0); + EXPECT_EQ(result, 0); + + ret_val = __llvm_libc::sscanf("1234567890", "12345%n", &result); + EXPECT_EQ(ret_val, 0); + EXPECT_EQ(result, 5); + + ret_val = __llvm_libc::sscanf("1234567890", "12345%n", &result); + EXPECT_EQ(ret_val, 0); + EXPECT_EQ(result, 5); + + // 288 characters + ret_val = __llvm_libc::sscanf("10000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000", + "%*d%hhn", &c_result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(c_result, char(288)); // Overflow is handled by casting. + + // 320 characters + ret_val = __llvm_libc::sscanf("10000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000" + "00000000000000000000000000000000", + "%*d%n", &result); + EXPECT_EQ(ret_val, 1); + EXPECT_EQ(result, 320); +} + TEST(LlvmLibcSScanfTest, CombinedConv) { int ret_val; int result = 0;