Expand Up
@@ -11,6 +11,8 @@
#include " src/__support/arg_list.h"
#include " src/__support/common.h"
#include " src/__support/ctype_utils.h"
#include " src/__support/str_to_integer.h"
#include " src/stdio/scanf_core/core_structs.h"
#include " src/stdio/scanf_core/scanf_config.h"
Expand All
@@ -19,17 +21,23 @@
namespace __llvm_libc {
namespace scanf_core {
class Parser {
#ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
#define GET_ARG_VAL_SIMPLEST (arg_type, index ) get_arg_value<arg_type>(index)
#else
#define GET_ARG_VAL_SIMPLEST (arg_type, _ ) get_next_arg_value<arg_type>()
#endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
template <typename ArgProvider> class Parser {
const char *__restrict str;
size_t cur_pos = 0 ;
internal::ArgList args_cur;
ArgProvider args_cur;
#ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
// args_start stores the start of the va_args, which is used when a previous
// argument is needed. In that case, we have to read the arguments from the
// beginning since they don't support reading backwards.
internal::ArgList args_start;
ArgProvider args_start;
size_t args_index = 1 ;
#endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
Expand All
@@ -46,18 +54,173 @@ class Parser {
// specified format section. This can either be a raw format section with no
// conversion, or a format section with a conversion that has all of its
// variables stored in the format section.
FormatSection get_next_section ();
LIBC_INLINE FormatSection get_next_section () {
FormatSection section;
size_t starting_pos = cur_pos;
if (str[cur_pos] == ' %' ) {
// format section
section.has_conv = true ;
++cur_pos;
[[maybe_unused]] size_t conv_index = 0 ;
#ifndef LIBC_COPT_SCANF_DISABLE_INDEX_MODE
conv_index = parse_index (&cur_pos);
#endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
if (str[cur_pos] == ' *' ) {
++cur_pos;
section.flags = FormatFlags::NO_WRITE;
}
// handle width
section.max_width = -1 ;
if (internal::isdigit (str[cur_pos])) {
auto result = internal::strtointeger<int >(str + cur_pos, 10 );
section.max_width = result.value ;
cur_pos = cur_pos + result.parsed_len ;
}
// TODO(michaelrj): add posix allocate flag support.
// if (str[cur_pos] == 'm') {
// ++cur_pos;
// section.flags = FormatFlags::ALLOCATE;
// }
LengthModifier lm = parse_length_modifier (&cur_pos);
section.length_modifier = lm;
section.conv_name = str[cur_pos];
// If NO_WRITE is not set, then read the next arg as the output pointer.
if ((section.flags & FormatFlags::NO_WRITE) == 0 ) {
// Since all outputs are pointers, there's no need to distinguish when
// reading from va_args. They're all the same size and stored the same.
section.output_ptr = GET_ARG_VAL_SIMPLEST (void *, conv_index);
}
// If the end of the format section is on the '\0'. This means we need to
// not advance the cur_pos and we should not count this has having a
// conversion.
if (str[cur_pos] != ' \0 ' ) {
++cur_pos;
} else {
section.has_conv = false ;
}
// If the format is a bracketed one, then we need to parse out the insides
// of the brackets.
if (section.conv_name == ' [' ) {
constexpr char CLOSING_BRACKET = ' ]' ;
constexpr char INVERT_FLAG = ' ^' ;
constexpr char RANGE_OPERATOR = ' -' ;
cpp::bitset<256 > scan_set;
bool invert = false ;
// The circumflex in the first position represents the inversion flag,
// but it's easier to apply that at the end so we just store it for now.
if (str[cur_pos] == INVERT_FLAG) {
invert = true ;
++cur_pos;
}
// This is used to determine if a hyphen is being used as a literal or
// as a range operator.
size_t set_start_pos = cur_pos;
// Normally the right bracket closes the set, but if it's the first
// character (possibly after the inversion flag) then it's instead
// included as a character in the set and the second right bracket
// closes the set.
if (str[cur_pos] == CLOSING_BRACKET) {
scan_set.set (CLOSING_BRACKET);
++cur_pos;
}
while (str[cur_pos] != ' \0 ' && str[cur_pos] != CLOSING_BRACKET) {
// If a hyphen is being used as a range operator, since it's neither
// at the beginning nor end of the set.
if (str[cur_pos] == RANGE_OPERATOR && cur_pos != set_start_pos &&
str[cur_pos + 1 ] != CLOSING_BRACKET && str[cur_pos + 1 ] != ' \0 ' ) {
// Technically there is no requirement to correct the ordering of
// the range, but since the range operator is entirely
// implementation defined it seems like a good convenience.
char a = str[cur_pos - 1 ];
char b = str[cur_pos + 1 ];
char start = (a < b ? a : b);
char end = (a < b ? b : a);
scan_set.set_range (start, end);
cur_pos += 2 ;
} else {
scan_set.set (str[cur_pos]);
++cur_pos;
}
}
if (invert)
scan_set.flip ();
if (str[cur_pos] == CLOSING_BRACKET) {
++cur_pos;
section.scan_set = scan_set;
} else {
// if the end of the string was encountered, this is not a valid set.
section.has_conv = false ;
}
}
} else {
// raw section
section.has_conv = false ;
while (str[cur_pos] != ' %' && str[cur_pos] != ' \0 ' )
++cur_pos;
}
section.raw_string = {str + starting_pos, cur_pos - starting_pos};
return section;
}
private:
// parse_length_modifier parses the length modifier inside a format string. It
// assumes that str[*local_pos] is inside a format specifier. It returns a
// LengthModifier with the length modifier it found. It will advance local_pos
// after the format specifier if one is found.
LengthModifier parse_length_modifier (size_t *local_pos);
LIBC_INLINE LengthModifier parse_length_modifier (size_t *local_pos) {
switch (str[*local_pos]) {
case (' l' ):
if (str[*local_pos + 1 ] == ' l' ) {
*local_pos += 2 ;
return LengthModifier::ll;
} else {
++*local_pos;
return LengthModifier::l;
}
case (' h' ):
if (str[*local_pos + 1 ] == ' h' ) {
*local_pos += 2 ;
return LengthModifier::hh;
} else {
++*local_pos;
return LengthModifier::h;
}
case (' L' ):
++*local_pos;
return LengthModifier::L;
case (' j' ):
++*local_pos;
return LengthModifier::j;
case (' z' ):
++*local_pos;
return LengthModifier::z;
case (' t' ):
++*local_pos;
return LengthModifier::t;
default :
return LengthModifier::NONE;
}
}
// get_next_arg_value gets the next value from the arg list as type T.
template <class T > LIBC_INLINE T get_next_arg_value () {
return args_cur.next_var <T>();
return args_cur.template next_var <T>();
}
// ----------------------------------------------------
Expand All
@@ -71,7 +234,17 @@ class Parser {
// returns 0 if there is no closing $, or if it finds no number. If it finds a
// number, it will move local_pos past the end of the $, else it will not move
// local_pos.
size_t parse_index (size_t *local_pos);
LIBC_INLINE size_t parse_index (size_t *local_pos) {
if (internal::isdigit (str[*local_pos])) {
auto result = internal::strtointeger<int >(str + *local_pos, 10 );
size_t index = result.value ;
if (str[*local_pos + result.parsed_len ] != ' $' )
return 0 ;
*local_pos = 1 + result.parsed_len + *local_pos;
return index ;
}
return 0 ;
}
// get_arg_value gets the value from the arg list at index (starting at 1).
// This may require parsing the format string. An index of 0 is interpreted as
Expand All
@@ -89,7 +262,19 @@ class Parser {
// It moves cur_args to the index requested so the appropriate value may
// be read. This may involve parsing the format string, and is in the worst
// case an O(n^2) operation.
void args_to_index (size_t index);
LIBC_INLINE void args_to_index (size_t index) {
if (args_index > index ) {
args_index = 1 ;
args_cur = args_start;
}
while (args_index < index ) {
// Since all arguments must be pointers, we can just read all of them as
// void * and not worry about type issues.
args_cur.template next_var <void *>();
++args_index;
}
}
#endif // LIBC_COPT_SCANF_DISABLE_INDEX_MODE
};
Expand Down