466 changes: 0 additions & 466 deletions libc/src/stdio/printf_core/parser.cpp

This file was deleted.

441 changes: 428 additions & 13 deletions libc/src/stdio/printf_core/parser.h

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion libc/src/stdio/printf_core/printf_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace printf_core {

int printf_main(Writer *writer, const char *__restrict str,
internal::ArgList &args) {
Parser parser(str, args);
Parser<internal::ArgList> parser(str, args);
int result = 0;
for (FormatSection cur_section = parser.get_next_section();
!cur_section.raw_string.empty();
Expand Down
4 changes: 1 addition & 3 deletions libc/src/stdio/scanf_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,8 @@ add_header_library(
libc.src.__support.FPUtil.fp_bits
)

add_object_library(
add_header_library(
parser
SRCS
parser.cpp
HDRS
parser.h
DEPENDS
Expand Down
225 changes: 0 additions & 225 deletions libc/src/stdio/scanf_core/parser.cpp

This file was deleted.

201 changes: 193 additions & 8 deletions libc/src/stdio/scanf_core/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

#include "src/__support/arg_list.h"
#include "src/__support/common.h"
#include "src/__support/ctype_utils.h"
#include "src/__support/str_to_integer.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/scanf_config.h"

Expand All @@ -19,17 +21,23 @@
namespace __llvm_libc {
namespace scanf_core {

class Parser {
#ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
#define GET_ARG_VAL_SIMPLEST(arg_type, index) get_arg_value<arg_type>(index)
#else
#define GET_ARG_VAL_SIMPLEST(arg_type, _) get_next_arg_value<arg_type>()
#endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE

template <typename ArgProvider> class Parser {
const char *__restrict str;

size_t cur_pos = 0;
internal::ArgList args_cur;
ArgProvider args_cur;

#ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
// args_start stores the start of the va_args, which is used when a previous
// argument is needed. In that case, we have to read the arguments from the
// beginning since they don't support reading backwards.
internal::ArgList args_start;
ArgProvider args_start;
size_t args_index = 1;
#endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE

Expand All @@ -46,18 +54,173 @@ class Parser {
// specified format section. This can either be a raw format section with no
// conversion, or a format section with a conversion that has all of its
// variables stored in the format section.
FormatSection get_next_section();
LIBC_INLINE FormatSection get_next_section() {
FormatSection section;
size_t starting_pos = cur_pos;
if (str[cur_pos] == '%') {
// format section
section.has_conv = true;

++cur_pos;
[[maybe_unused]] size_t conv_index = 0;

#ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
conv_index = parse_index(&cur_pos);
#endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE

if (str[cur_pos] == '*') {
++cur_pos;
section.flags = FormatFlags::NO_WRITE;
}

// handle width
section.max_width = -1;
if (internal::isdigit(str[cur_pos])) {
auto result = internal::strtointeger<int>(str + cur_pos, 10);
section.max_width = result.value;
cur_pos = cur_pos + result.parsed_len;
}

// TODO(michaelrj): add posix allocate flag support.
// if (str[cur_pos] == 'm') {
// ++cur_pos;
// section.flags = FormatFlags::ALLOCATE;
// }

LengthModifier lm = parse_length_modifier(&cur_pos);
section.length_modifier = lm;

section.conv_name = str[cur_pos];

// If NO_WRITE is not set, then read the next arg as the output pointer.
if ((section.flags & FormatFlags::NO_WRITE) == 0) {
// Since all outputs are pointers, there's no need to distinguish when
// reading from va_args. They're all the same size and stored the same.
section.output_ptr = GET_ARG_VAL_SIMPLEST(void *, conv_index);
}

// If the end of the format section is on the '\0'. This means we need to
// not advance the cur_pos and we should not count this has having a
// conversion.
if (str[cur_pos] != '\0') {
++cur_pos;
} else {
section.has_conv = false;
}

// If the format is a bracketed one, then we need to parse out the insides
// of the brackets.
if (section.conv_name == '[') {
constexpr char CLOSING_BRACKET = ']';
constexpr char INVERT_FLAG = '^';
constexpr char RANGE_OPERATOR = '-';

cpp::bitset<256> scan_set;
bool invert = false;

// The circumflex in the first position represents the inversion flag,
// but it's easier to apply that at the end so we just store it for now.
if (str[cur_pos] == INVERT_FLAG) {
invert = true;
++cur_pos;
}

// This is used to determine if a hyphen is being used as a literal or
// as a range operator.
size_t set_start_pos = cur_pos;

// Normally the right bracket closes the set, but if it's the first
// character (possibly after the inversion flag) then it's instead
// included as a character in the set and the second right bracket
// closes the set.
if (str[cur_pos] == CLOSING_BRACKET) {
scan_set.set(CLOSING_BRACKET);
++cur_pos;
}

while (str[cur_pos] != '\0' && str[cur_pos] != CLOSING_BRACKET) {
// If a hyphen is being used as a range operator, since it's neither
// at the beginning nor end of the set.
if (str[cur_pos] == RANGE_OPERATOR && cur_pos != set_start_pos &&
str[cur_pos + 1] != CLOSING_BRACKET && str[cur_pos + 1] != '\0') {
// Technically there is no requirement to correct the ordering of
// the range, but since the range operator is entirely
// implementation defined it seems like a good convenience.
char a = str[cur_pos - 1];
char b = str[cur_pos + 1];
char start = (a < b ? a : b);
char end = (a < b ? b : a);
scan_set.set_range(start, end);
cur_pos += 2;
} else {
scan_set.set(str[cur_pos]);
++cur_pos;
}
}
if (invert)
scan_set.flip();

if (str[cur_pos] == CLOSING_BRACKET) {
++cur_pos;
section.scan_set = scan_set;
} else {
// if the end of the string was encountered, this is not a valid set.
section.has_conv = false;
}
}
} else {
// raw section
section.has_conv = false;
while (str[cur_pos] != '%' && str[cur_pos] != '\0')
++cur_pos;
}
section.raw_string = {str + starting_pos, cur_pos - starting_pos};
return section;
}

private:
// parse_length_modifier parses the length modifier inside a format string. It
// assumes that str[*local_pos] is inside a format specifier. It returns a
// LengthModifier with the length modifier it found. It will advance local_pos
// after the format specifier if one is found.
LengthModifier parse_length_modifier(size_t *local_pos);
LIBC_INLINE LengthModifier parse_length_modifier(size_t *local_pos) {
switch (str[*local_pos]) {
case ('l'):
if (str[*local_pos + 1] == 'l') {
*local_pos += 2;
return LengthModifier::ll;
} else {
++*local_pos;
return LengthModifier::l;
}
case ('h'):
if (str[*local_pos + 1] == 'h') {
*local_pos += 2;
return LengthModifier::hh;
} else {
++*local_pos;
return LengthModifier::h;
}
case ('L'):
++*local_pos;
return LengthModifier::L;
case ('j'):
++*local_pos;
return LengthModifier::j;
case ('z'):
++*local_pos;
return LengthModifier::z;
case ('t'):
++*local_pos;
return LengthModifier::t;
default:
return LengthModifier::NONE;
}
}

// get_next_arg_value gets the next value from the arg list as type T.
template <class T> LIBC_INLINE T get_next_arg_value() {
return args_cur.next_var<T>();
return args_cur.template next_var<T>();
}

//----------------------------------------------------
Expand All @@ -71,7 +234,17 @@ class Parser {
// returns 0 if there is no closing $, or if it finds no number. If it finds a
// number, it will move local_pos past the end of the $, else it will not move
// local_pos.
size_t parse_index(size_t *local_pos);
LIBC_INLINE size_t parse_index(size_t *local_pos) {
if (internal::isdigit(str[*local_pos])) {
auto result = internal::strtointeger<int>(str + *local_pos, 10);
size_t index = result.value;
if (str[*local_pos + result.parsed_len] != '$')
return 0;
*local_pos = 1 + result.parsed_len + *local_pos;
return index;
}
return 0;
}

// get_arg_value gets the value from the arg list at index (starting at 1).
// This may require parsing the format string. An index of 0 is interpreted as
Expand All @@ -89,7 +262,19 @@ class Parser {
// It moves cur_args to the index requested so the appropriate value may
// be read. This may involve parsing the format string, and is in the worst
// case an O(n^2) operation.
void args_to_index(size_t index);
LIBC_INLINE void args_to_index(size_t index) {
if (args_index > index) {
args_index = 1;
args_cur = args_start;
}

while (args_index < index) {
// Since all arguments must be pointers, we can just read all of them as
// void * and not worry about type issues.
args_cur.template next_var<void *>();
++args_index;
}
}

#endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
};
Expand Down
2 changes: 1 addition & 1 deletion libc/src/stdio/scanf_core/scanf_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ namespace scanf_core {

int scanf_main(Reader *reader, const char *__restrict str,
internal::ArgList &args) {
Parser parser(str, args);
Parser<internal::ArgList> parser(str, args);
int ret_val = READ_OK;
int conversions = 0;
for (FormatSection cur_section = parser.get_next_section();
Expand Down
9 changes: 5 additions & 4 deletions libc/test/src/stdio/printf_core/parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,24 +17,25 @@
#include "test/UnitTest/Test.h"

using __llvm_libc::cpp::string_view;
using __llvm_libc::internal::ArgList;

void init(const char *__restrict str, ...) {
va_list vlist;
va_start(vlist, str);
__llvm_libc::internal::ArgList v(vlist);
ArgList v(vlist);
va_end(vlist);

__llvm_libc::printf_core::Parser parser(str, v);
__llvm_libc::printf_core::Parser<ArgList> parser(str, v);
}

void evaluate(__llvm_libc::printf_core::FormatSection *format_arr,
const char *__restrict str, ...) {
va_list vlist;
va_start(vlist, str);
__llvm_libc::internal::ArgList v(vlist);
ArgList v(vlist);
va_end(vlist);

__llvm_libc::printf_core::Parser parser(str, v);
__llvm_libc::printf_core::Parser<ArgList> parser(str, v);

for (auto cur_section = parser.get_next_section();
!cur_section.raw_string.empty();
Expand Down
7 changes: 4 additions & 3 deletions libc/test/src/stdio/scanf_core/parser_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
#include "test/UnitTest/Test.h"

using __llvm_libc::cpp::string_view;
using __llvm_libc::internal::ArgList;

void init(const char *__restrict str, ...) {
va_list vlist;
va_start(vlist, str);
__llvm_libc::internal::ArgList v(vlist);
ArgList v(vlist);
va_end(vlist);

__llvm_libc::scanf_core::Parser parser(str, v);
__llvm_libc::scanf_core::Parser<ArgList> parser(str, v);
}

void evaluate(__llvm_libc::scanf_core::FormatSection *format_arr,
Expand All @@ -35,7 +36,7 @@ void evaluate(__llvm_libc::scanf_core::FormatSection *format_arr,
__llvm_libc::internal::ArgList v(vlist);
va_end(vlist);

__llvm_libc::scanf_core::Parser parser(str, v);
__llvm_libc::scanf_core::Parser<ArgList> parser(str, v);

for (auto cur_section = parser.get_next_section();
!cur_section.raw_string.empty();
Expand Down
2 changes: 0 additions & 2 deletions utils/bazel/llvm-project-overlay/libc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -2692,7 +2692,6 @@ libc_support_library(

libc_support_library(
name = "printf_parser",
srcs = ["src/stdio/printf_core/parser.cpp"],
hdrs = ["src/stdio/printf_core/parser.h"],
defines = PRINTF_COPTS,
deps = [
Expand All @@ -2714,7 +2713,6 @@ libc_support_library(
# Only used for testing.
libc_support_library(
name = "printf_mock_parser",
srcs = ["src/stdio/printf_core/parser.cpp"],
hdrs = ["src/stdio/printf_core/parser.h"],
defines = PRINTF_COPTS + ["LIBC_COPT_MOCK_ARG_LIST"],
deps = [
Expand Down