99 changes: 99 additions & 0 deletions libc/src/stdio/scanf_core/parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
//===-- Format string parser for scanf -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_PARSER_H
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_PARSER_H

#include "src/__support/arg_list.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/scanf_config.h"

#include <stddef.h>

namespace __llvm_libc {
namespace scanf_core {

class Parser {
const char *__restrict str;

size_t cur_pos = 0;
internal::ArgList args_cur;

#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE
// args_start stores the start of the va_args, which is used when a previous
// argument is needed. In that case, we have to read the arguments from the
// beginning since they don't support reading backwards.
internal::ArgList args_start;
size_t args_index = 1;
#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE

public:
#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE
Parser(const char *__restrict new_str, internal::ArgList &args)
: str(new_str), args_cur(args), args_start(args) {}
#else
Parser(const char *__restrict new_str, internal::ArgList &args)
: str(new_str), args_cur(args) {}
#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE

// get_next_section will parse the format string until it has a fully
// specified format section. This can either be a raw format section with no
// conversion, or a format section with a conversion that has all of its
// variables stored in the format section.
FormatSection get_next_section();

private:
// parse_length_modifier parses the length modifier inside a format string. It
// assumes that str[*local_pos] is inside a format specifier. It returns a
// LengthModifier with the length modifier it found. It will advance local_pos
// after the format specifier if one is found.
LengthModifier parse_length_modifier(size_t *local_pos);

// get_next_arg_value gets the next value from the arg list as type T.
template <class T> T inline get_next_arg_value() {
return args_cur.next_var<T>();
}

//----------------------------------------------------
// INDEX MODE ONLY FUNCTIONS AFTER HERE:
//----------------------------------------------------

#ifndef LLVM_LIBC_SCANF_DISABLE_INDEX_MODE

// parse_index parses the index of a value inside a format string. It
// assumes that str[*local_pos] points to character after a '%' or '*', and
// returns 0 if there is no closing $, or if it finds no number. If it finds a
// number, it will move local_pos past the end of the $, else it will not move
// local_pos.
size_t parse_index(size_t *local_pos);

// get_arg_value gets the value from the arg list at index (starting at 1).
// This may require parsing the format string. An index of 0 is interpreted as
// the next value.
template <class T> T inline get_arg_value(size_t index) {
if (!(index == 0 || index == args_index))
args_to_index(index);

++args_index;
return get_next_arg_value<T>();
}

// the ArgList can only return the next item in the list. This function is
// used in index mode when the item that needs to be read is not the next one.
// It moves cur_args to the index requested so the the appropriate value may
// be read. This may involve parsing the format string, and is in the worst
// case an O(n^2) operation.
void args_to_index(size_t index);

#endif // LLVM_LIBC_SCANF_DISABLE_INDEX_MODE
};

} // namespace scanf_core
} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_PARSER_H
24 changes: 24 additions & 0 deletions libc/src/stdio/scanf_core/scanf_config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===-- Scanf Configuration Handler ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_CONFIG_H
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_CONFIG_H

// These macros can be set or unset to adjust scanf behavior at compile time.

// This flag disables all functionality relating to floating point numbers. This
// can be useful for embedded systems or other situations where binary size is
// important.
// #define LLVM_LIBC_SCANF_DISABLE_FLOAT

// This flag disables index mode, a posix extension often used for
// internationalization of format strings. Supporting it takes up additional
// memory and parsing time, so it can be disabled if it's not used.
// #define LLVM_LIBC_SCANF_DISABLE_INDEX_MODE

#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_SCANF_CONFIG_H
1 change: 1 addition & 0 deletions libc/test/src/stdio/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,5 @@ add_libc_unittest(
)

add_subdirectory(printf_core)
add_subdirectory(scanf_core)
add_subdirectory(testdata)
66 changes: 33 additions & 33 deletions libc/test/src/stdio/printf_core/parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ TEST(LlvmLibcPrintfParserTest, EvalRaw) {

expected.raw_string = {str, 4};

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
// TODO: add checks that the format_arr after the last one has length 0
}

Expand All @@ -70,20 +70,20 @@ TEST(LlvmLibcPrintfParserTest, EvalSimple) {

expected0.raw_string = {str, 5};

ASSERT_FORMAT_EQ(expected0, format_arr[0]);
ASSERT_PFORMAT_EQ(expected0, format_arr[0]);

expected1.has_conv = true;

expected1.raw_string = {str + 5, 2};
expected1.conv_name = '%';

ASSERT_FORMAT_EQ(expected1, format_arr[1]);
ASSERT_PFORMAT_EQ(expected1, format_arr[1]);

expected2.has_conv = false;

expected2.raw_string = {str + 7, 5};

ASSERT_FORMAT_EQ(expected2, format_arr[2]);
ASSERT_PFORMAT_EQ(expected2, format_arr[2]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArg) {
Expand All @@ -99,7 +99,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArg) {
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFlags) {
Expand All @@ -121,7 +121,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithFlags) {
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithWidth) {
Expand All @@ -138,7 +138,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithWidth) {
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithPrecision) {
Expand All @@ -155,7 +155,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithPrecision) {
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithTrivialPrecision) {
Expand All @@ -172,7 +172,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithTrivialPrecision) {
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithShortLengthModifier) {
Expand All @@ -189,7 +189,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithShortLengthModifier) {
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
Expand All @@ -206,7 +206,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithLongLengthModifier) {
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
Expand All @@ -229,7 +229,7 @@ TEST(LlvmLibcPrintfParserTest, EvalOneArgWithAllOptions) {
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, EvalThreeArgs) {
Expand All @@ -247,23 +247,23 @@ TEST(LlvmLibcPrintfParserTest, EvalThreeArgs) {
expected0.conv_val_raw = arg1;
expected0.conv_name = 'd';

ASSERT_FORMAT_EQ(expected0, format_arr[0]);
ASSERT_PFORMAT_EQ(expected0, format_arr[0]);

expected1.has_conv = true;

expected1.raw_string = {str + 2, 2};
expected1.conv_val_raw = __llvm_libc::cpp::bit_cast<uint64_t>(arg2);
expected1.conv_name = 'f';

ASSERT_FORMAT_EQ(expected1, format_arr[1]);
ASSERT_PFORMAT_EQ(expected1, format_arr[1]);

expected2.has_conv = true;

expected2.raw_string = {str + 4, 2};
expected2.conv_val_ptr = const_cast<char *>(arg3);
expected2.conv_name = 's';

ASSERT_FORMAT_EQ(expected2, format_arr[2]);
ASSERT_PFORMAT_EQ(expected2, format_arr[2]);
}

#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
Expand All @@ -281,7 +281,7 @@ TEST(LlvmLibcPrintfParserTest, IndexModeOneArg) {
expected.conv_val_raw = arg1;
expected.conv_name = 'd';

ASSERT_FORMAT_EQ(expected, format_arr[0]);
ASSERT_PFORMAT_EQ(expected, format_arr[0]);
}

TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsSequential) {
Expand All @@ -299,23 +299,23 @@ TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsSequential) {
expected0.conv_val_raw = arg1;
expected0.conv_name = 'd';

ASSERT_FORMAT_EQ(expected0, format_arr[0]);
ASSERT_PFORMAT_EQ(expected0, format_arr[0]);

expected1.has_conv = true;

expected1.raw_string = {str + 4, 4};
expected1.conv_val_raw = __llvm_libc::cpp::bit_cast<uint64_t>(arg2);
expected1.conv_name = 'f';

ASSERT_FORMAT_EQ(expected1, format_arr[1]);
ASSERT_PFORMAT_EQ(expected1, format_arr[1]);

expected2.has_conv = true;

expected2.raw_string = {str + 8, 4};
expected2.conv_val_ptr = const_cast<char *>(arg3);
expected2.conv_name = 's';

ASSERT_FORMAT_EQ(expected2, format_arr[2]);
ASSERT_PFORMAT_EQ(expected2, format_arr[2]);
}

TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsReverse) {
Expand All @@ -333,23 +333,23 @@ TEST(LlvmLibcPrintfParserTest, IndexModeThreeArgsReverse) {
expected0.conv_val_raw = arg1;
expected0.conv_name = 'd';

ASSERT_FORMAT_EQ(expected0, format_arr[0]);
ASSERT_PFORMAT_EQ(expected0, format_arr[0]);

expected1.has_conv = true;

expected1.raw_string = {str + 4, 4};
expected1.conv_val_raw = __llvm_libc::cpp::bit_cast<uint64_t>(arg2);
expected1.conv_name = 'f';

ASSERT_FORMAT_EQ(expected1, format_arr[1]);
ASSERT_PFORMAT_EQ(expected1, format_arr[1]);

expected2.has_conv = true;

expected2.raw_string = {str + 8, 4};
expected2.conv_val_ptr = const_cast<char *>(arg3);
expected2.conv_name = 's';

ASSERT_FORMAT_EQ(expected2, format_arr[2]);
ASSERT_PFORMAT_EQ(expected2, format_arr[2]);
}

TEST(LlvmLibcPrintfParserTest, IndexModeTenArgsRandom) {
Expand All @@ -367,7 +367,7 @@ TEST(LlvmLibcPrintfParserTest, IndexModeTenArgsRandom) {
static_cast<size_t>(4 + (i >= 9 ? 1 : 0))};
expected.conv_val_raw = i + 1;
expected.conv_name = 'd';
EXPECT_FORMAT_EQ(expected, format_arr[i]);
EXPECT_PFORMAT_EQ(expected, format_arr[i]);
}
}

Expand All @@ -388,7 +388,7 @@ TEST(LlvmLibcPrintfParserTest, IndexModeComplexParsing) {

expected0.raw_string = {str, 12};

EXPECT_FORMAT_EQ(expected0, format_arr[0]);
EXPECT_PFORMAT_EQ(expected0, format_arr[0]);

expected1.has_conv = true;

Expand All @@ -397,26 +397,26 @@ TEST(LlvmLibcPrintfParserTest, IndexModeComplexParsing) {
expected1.conv_val_raw = arg3;
expected1.conv_name = 'u';

EXPECT_FORMAT_EQ(expected1, format_arr[1]);
EXPECT_PFORMAT_EQ(expected1, format_arr[1]);

expected2.has_conv = false;

expected2.raw_string = {str + 18, 1};

EXPECT_FORMAT_EQ(expected2, format_arr[2]);
EXPECT_PFORMAT_EQ(expected2, format_arr[2]);

expected3.has_conv = true;

expected3.raw_string = {str + 19, 2};
expected3.conv_name = '%';

EXPECT_FORMAT_EQ(expected3, format_arr[3]);
EXPECT_PFORMAT_EQ(expected3, format_arr[3]);

expected4.has_conv = false;

expected4.raw_string = {str + 21, 1};

EXPECT_FORMAT_EQ(expected4, format_arr[4]);
EXPECT_PFORMAT_EQ(expected4, format_arr[4]);

expected5.has_conv = true;

Expand All @@ -426,13 +426,13 @@ TEST(LlvmLibcPrintfParserTest, IndexModeComplexParsing) {
expected5.conv_val_raw = __llvm_libc::cpp::bit_cast<uint64_t>(arg2);
expected5.conv_name = 'f';

EXPECT_FORMAT_EQ(expected5, format_arr[5]);
EXPECT_PFORMAT_EQ(expected5, format_arr[5]);

expected6.has_conv = false;

expected6.raw_string = {str + 30, 1};

EXPECT_FORMAT_EQ(expected6, format_arr[6]);
EXPECT_PFORMAT_EQ(expected6, format_arr[6]);

expected7.has_conv = true;

Expand All @@ -442,13 +442,13 @@ TEST(LlvmLibcPrintfParserTest, IndexModeComplexParsing) {
expected7.conv_val_raw = __llvm_libc::cpp::bit_cast<uint64_t>(arg2);
expected7.conv_name = 'f';

EXPECT_FORMAT_EQ(expected7, format_arr[7]);
EXPECT_PFORMAT_EQ(expected7, format_arr[7]);

expected8.has_conv = false;

expected8.raw_string = {str + 40, 1};

EXPECT_FORMAT_EQ(expected8, format_arr[8]);
EXPECT_PFORMAT_EQ(expected8, format_arr[8]);

expected9.has_conv = true;

Expand All @@ -458,7 +458,7 @@ TEST(LlvmLibcPrintfParserTest, IndexModeComplexParsing) {
expected9.conv_val_raw = arg1;
expected9.conv_name = 'c';

EXPECT_FORMAT_EQ(expected9, format_arr[9]);
EXPECT_PFORMAT_EQ(expected9, format_arr[9]);
}

#endif // LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
14 changes: 14 additions & 0 deletions libc/test/src/stdio/scanf_core/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
add_libc_unittest(
parser_test
SUITE
libc_stdio_unittests
SRCS
parser_test.cpp
LINK_LIBRARIES
LibcScanfHelpers
DEPENDS
libc.src.stdio.scanf_core.parser
libc.src.stdio.scanf_core.core_structs
libc.src.__support.CPP.string_view
libc.src.__support.arg_list
)
754 changes: 754 additions & 0 deletions libc/test/src/stdio/scanf_core/parser_test.cpp

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions libc/utils/UnitTest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,18 @@ add_dependencies(
libc.src.stdio.printf_core.core_structs
libc.utils.UnitTest.string_utils
)

add_library(
LibcScanfHelpers
ScanfMatcher.h
ScanfMatcher.cpp
)
target_include_directories(LibcScanfHelpers PUBLIC ${LIBC_SOURCE_DIR})
target_link_libraries(LibcScanfHelpers LibcUnitTest)
add_dependencies(
LibcScanfHelpers
LibcUnitTest
libc.src.__support.FPUtil.fp_bits
libc.src.stdio.scanf_core.core_structs
libc.utils.UnitTest.string_utils
)
4 changes: 2 additions & 2 deletions libc/utils/UnitTest/PrintfMatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ class FormatSectionMatcher
} // namespace printf_core
} // namespace __llvm_libc

#define EXPECT_FORMAT_EQ(expected, actual) \
#define EXPECT_PFORMAT_EQ(expected, actual) \
EXPECT_THAT(actual, __llvm_libc::printf_core::testing::FormatSectionMatcher( \
expected))

#define ASSERT_FORMAT_EQ(expected, actual) \
#define ASSERT_PFORMAT_EQ(expected, actual) \
ASSERT_THAT(actual, __llvm_libc::printf_core::testing::FormatSectionMatcher( \
expected))

Expand Down
99 changes: 99 additions & 0 deletions libc/utils/UnitTest/ScanfMatcher.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
//===-- ScanfMatcher.cpp ----------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "ScanfMatcher.h"

#include "src/__support/FPUtil/FPBits.h"
#include "src/stdio/scanf_core/core_structs.h"

#include "utils/UnitTest/StringUtils.h"

#include <stdint.h>

namespace __llvm_libc {
namespace scanf_core {
namespace testing {

bool FormatSectionMatcher::match(FormatSection actualValue) {
actual = actualValue;
return expected == actual;
}

namespace {

#define IF_FLAG_SHOW_FLAG(flag_name) \
do { \
if ((form.flags & FormatFlags::flag_name) == FormatFlags::flag_name) \
stream << "\n\t\t" << #flag_name; \
} while (false)
#define CASE_LM(lm) \
case (LengthModifier::lm): \
stream << #lm; \
break

void display(testutils::StreamWrapper &stream, FormatSection form) {
stream << "Raw String (len " << form.raw_string.size() << "): \"";
for (size_t i = 0; i < form.raw_string.size(); ++i) {
stream << form.raw_string[i];
}
stream << "\"";
if (form.has_conv) {
stream << "\n\tHas Conv\n\tFlags:";
IF_FLAG_SHOW_FLAG(NO_WRITE);
IF_FLAG_SHOW_FLAG(ALLOCATE);
stream << "\n";
stream << "\tmax width: " << form.max_width << "\n";
stream << "\tlength modifier: ";
switch (form.length_modifier) {
CASE_LM(NONE);
CASE_LM(l);
CASE_LM(ll);
CASE_LM(h);
CASE_LM(hh);
CASE_LM(j);
CASE_LM(z);
CASE_LM(t);
CASE_LM(L);
}
stream << "\n";
// If the pointer is used (NO_WRITE is not set and the conversion isn't %).
if (((form.flags & FormatFlags::NO_WRITE) == 0) &&
(form.conv_name != '%')) {
stream << "\tpointer value: "
<< int_to_hex<uintptr_t>(
reinterpret_cast<uintptr_t>(form.output_ptr))
<< "\n";
}

stream << "\tconversion name: " << form.conv_name << "\n";

if (form.conv_name == '[') {
stream << "\t\t";
for (size_t i = 0; i < 256 /* char max */; ++i) {
if (form.scan_set.test(i)) {
stream << static_cast<char>(i);
}
}
stream << "\n\t]\n";
}
}
}
} // anonymous namespace

void FormatSectionMatcher::explainError(testutils::StreamWrapper &stream) {
stream << "expected format section: ";
display(stream, expected);
stream << '\n';
stream << "actual format section : ";
display(stream, actual);
stream << '\n';
}

} // namespace testing
} // namespace scanf_core
} // namespace __llvm_libc
46 changes: 46 additions & 0 deletions libc/utils/UnitTest/ScanfMatcher.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//===-- ScanfMatcher.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_UTILS_UNITTEST_SCANF_MATCHER_H
#define LLVM_LIBC_UTILS_UNITTEST_SCANF_MATCHER_H

#include "src/stdio/scanf_core/core_structs.h"
#include "utils/UnitTest/Test.h"

#include <errno.h>

namespace __llvm_libc {
namespace scanf_core {
namespace testing {

class FormatSectionMatcher
: public __llvm_libc::testing::Matcher<FormatSection> {
FormatSection expected;
FormatSection actual;

public:
FormatSectionMatcher(FormatSection expectedValue) : expected(expectedValue) {}

bool match(FormatSection actualValue);

void explainError(testutils::StreamWrapper &stream) override;
};

} // namespace testing
} // namespace scanf_core
} // namespace __llvm_libc

#define EXPECT_SFORMAT_EQ(expected, actual) \
EXPECT_THAT(actual, __llvm_libc::scanf_core::testing::FormatSectionMatcher( \
expected))

#define ASSERT_SFORMAT_EQ(expected, actual) \
ASSERT_THAT(actual, __llvm_libc::scanf_core::testing::FormatSectionMatcher( \
expected))

#endif // LLVM_LIBC_UTILS_UNITTEST_SCANF_MATCHER_H