67 changes: 8 additions & 59 deletions libc/src/stdio/scanf_core/int_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "src/__support/CPP/limits.h"
#include "src/__support/ctype_utils.h"
#include "src/stdio/scanf_core/converter_utils.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/reader.h"

Expand All @@ -18,58 +19,6 @@
namespace __llvm_libc {
namespace scanf_core {

constexpr char inline to_lower(char a) { return a | 32; }

constexpr inline int b36_char_to_int(char input) {
if (internal::isdigit(input))
return input - '0';
if (internal::isalpha(input))
return to_lower(input) + 10 - 'a';
return 0;
}

void write_with_length(uintmax_t output_val, const FormatSection &to_conv) {
if ((to_conv.flags & NO_WRITE) != 0) {
return;
}
LengthModifier lm = to_conv.length_modifier;
void *output_ptr = to_conv.output_ptr;
switch (lm) {
case (LengthModifier::hh):
*reinterpret_cast<unsigned char *>(output_ptr) =
static_cast<unsigned char>(output_val);
break;
case (LengthModifier::h):
*reinterpret_cast<unsigned short *>(output_ptr) =
static_cast<unsigned short>(output_val);
break;
case (LengthModifier::NONE):
*reinterpret_cast<unsigned int *>(output_ptr) =
static_cast<unsigned int>(output_val);
break;
case (LengthModifier::l):
*reinterpret_cast<unsigned long *>(output_ptr) =
static_cast<unsigned long>(output_val);
break;
case (LengthModifier::ll):
case (LengthModifier::L):
*reinterpret_cast<unsigned long long *>(output_ptr) =
static_cast<unsigned long long>(output_val);
break;
case (LengthModifier::j):
*reinterpret_cast<uintmax_t *>(output_ptr) =
static_cast<uintmax_t>(output_val);
break;
case (LengthModifier::z):
*reinterpret_cast<size_t *>(output_ptr) = static_cast<size_t>(output_val);
break;
case (LengthModifier::t):
*reinterpret_cast<ptrdiff_t *>(output_ptr) =
static_cast<ptrdiff_t>(output_val);
break;
}
}

// This code is very similar to the code in __support/str_to_integer.h but is
// not quite the same. Here is the list of differences and why they exist:
// 1) This takes a reader and a format section instead of a char* and the base.
Expand Down Expand Up @@ -130,7 +79,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
is_signed = true;
} else if (to_conv.conv_name == 'o') {
base = 8;
} else if (to_lower(to_conv.conv_name) == 'x') {
} else if (to_lower(to_conv.conv_name) == 'x' || to_conv.conv_name == 'p') {
base = 16;
} else if (to_conv.conv_name == 'd') {
base = 10;
Expand All @@ -150,7 +99,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
} else {
// If the max width has been hit already, then the return value must be 0
// since no actual digits of the number have been parsed yet.
write_with_length(0, to_conv);
write_int_with_length(0, to_conv);
return MATCHING_FAILURE;
}
}
Expand All @@ -168,7 +117,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
--max_width;
cur_char = reader->getc();
} else {
write_with_length(0, to_conv);
write_int_with_length(0, to_conv);
return READ_OK;
}

Expand All @@ -179,7 +128,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
--max_width;
cur_char = reader->getc();
} else {
write_with_length(0, to_conv);
write_int_with_length(0, to_conv);
return READ_OK;
}

Expand All @@ -196,7 +145,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
// If the first character isn't a valid digit, then there are no valid
// digits at all. The number is 0.
reader->ungetc(cur_char);
write_with_length(0, to_conv);
write_int_with_length(0, to_conv);
return MATCHING_FAILURE;
}
}
Expand Down Expand Up @@ -249,12 +198,12 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
reader->ungetc(cur_char);

if (has_overflow) {
write_with_length(MAX, to_conv);
write_int_with_length(MAX, to_conv);
} else {
if (is_negative)
result = -result;

write_with_length(result, to_conv);
write_int_with_length(result, to_conv);
}

if (!is_number)
Expand Down
43 changes: 43 additions & 0 deletions libc/src/stdio/scanf_core/ptr_converter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
//===-- Int type specifier converters for scanf -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/stdio/scanf_core/ptr_converter.h"

#include "src/stdio/scanf_core/converter_utils.h"
#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/int_converter.h"
#include "src/stdio/scanf_core/reader.h"

#include <stddef.h>

namespace __llvm_libc {
namespace scanf_core {
int convert_pointer(Reader *reader, const FormatSection &to_conv) {
static const char nullptr_string[] = "(nullptr)";

// Check if it's exactly the nullptr string, if so then it's a nullptr.
char cur_char = reader->getc();
size_t i = 0;
for (; i < sizeof(nullptr_string) && to_lower(cur_char) == nullptr_string[i];
++i) {
cur_char = reader->getc();
}
if (i == (sizeof(nullptr_string) - 1)) {
*reinterpret_cast<void **>(to_conv.output_ptr) = nullptr;
return READ_OK;
} else if (i > 0) {
return MATCHING_FAILURE;
}

reader->ungetc(cur_char);

// Else treat it as a hex int
return convert_int(reader, to_conv);
}
} // namespace scanf_core
} // namespace __llvm_libc
25 changes: 25 additions & 0 deletions libc/src/stdio/scanf_core/ptr_converter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===-- Pointer specifier converter for scanf -------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_PTR_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_PTR_CONVERTER_H

#include "src/stdio/scanf_core/core_structs.h"
#include "src/stdio/scanf_core/reader.h"

#include <stddef.h>

namespace __llvm_libc {
namespace scanf_core {

int convert_pointer(Reader *reader, const FormatSection &to_conv);

} // namespace scanf_core
} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_PTR_CONVERTER_H
5 changes: 4 additions & 1 deletion libc/src/stdio/scanf_core/scanf_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ int scanf_main(Reader *reader, const char *__restrict str,
cur_section = parser.get_next_section()) {
if (cur_section.has_conv) {
ret_val = convert(reader, cur_section);
conversions += ret_val == READ_OK ? 1 : 0;
// The %n (current position) conversion doesn't increment the number of
// assignments.
if (cur_section.conv_name != 'n')
conversions += ret_val == READ_OK ? 1 : 0;
} else {
ret_val = raw_match(reader, cur_section.raw_string);
}
Expand Down
106 changes: 101 additions & 5 deletions libc/test/src/stdio/sscanf_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,20 @@ TEST(LlvmLibcSScanfTest, IntConvSimple) {
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, 345);

// 288 characters
ret_val = __llvm_libc::sscanf("10000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000",
"%d", &result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, int(__llvm_libc::cpp::numeric_limits<intmax_t>::max()));

ret_val = __llvm_libc::sscanf("Not an integer", "%d", &result);
EXPECT_EQ(ret_val, 0);
}
Expand Down Expand Up @@ -445,11 +459,6 @@ TEST(LlvmLibcSScanfTest, FloatConvComplexParsing) {
EXPECT_FP_EQ(result, 1.2);
}

/*
TODO:
Max width tests
*/

TEST(LlvmLibcSScanfTest, FloatConvMaxWidth) {
int ret_val;
float result = 0;
Expand Down Expand Up @@ -572,6 +581,93 @@ TEST(LlvmLibcSScanfTest, FloatConvNoWrite) {
EXPECT_EQ(ret_val, 0);
}

TEST(LlvmLibcSScanfTest, CurPosCombined) {
int ret_val;
int result = -1;
char c_result = 0;

ret_val = __llvm_libc::sscanf("some text", "%n", &result);
// %n doesn't count as a conversion for the return value.
EXPECT_EQ(ret_val, 0);
EXPECT_EQ(result, 0);

ret_val = __llvm_libc::sscanf("1234567890", "12345%n", &result);
EXPECT_EQ(ret_val, 0);
EXPECT_EQ(result, 5);

ret_val = __llvm_libc::sscanf("1234567890", "12345%n", &result);
EXPECT_EQ(ret_val, 0);
EXPECT_EQ(result, 5);

// 288 characters
ret_val = __llvm_libc::sscanf("10000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000",
"%*d%hhn", &c_result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(c_result, char(288)); // Overflow is handled by casting.

// 320 characters
ret_val = __llvm_libc::sscanf("10000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000"
"00000000000000000000000000000000",
"%*d%n", &result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, 320);
}

TEST(LlvmLibcSScanfTest, PointerConvCombined) {
int ret_val;
void *result;

ret_val = __llvm_libc::sscanf("(nullptr)", "%p", &result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, static_cast<void *>(nullptr));

ret_val = __llvm_libc::sscanf("(NuLlPtR)", "%p", &result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, static_cast<void *>(nullptr));

ret_val = __llvm_libc::sscanf("(NULLPTR)", "%p", &result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, static_cast<void *>(nullptr));

ret_val = __llvm_libc::sscanf("(null)", "%p", &result);
EXPECT_EQ(ret_val, 0);

ret_val = __llvm_libc::sscanf("(nullptr2", "%p", &result);
EXPECT_EQ(ret_val, 0);

ret_val = __llvm_libc::sscanf("0", "%p", &result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, reinterpret_cast<void *>(0));

ret_val = __llvm_libc::sscanf("100", "%p", &result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, reinterpret_cast<void *>(0x100));

ret_val = __llvm_libc::sscanf("-1", "%p", &result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, reinterpret_cast<void *>(-1));

ret_val = __llvm_libc::sscanf("0xabcDEFG", "%p", &result);
EXPECT_EQ(ret_val, 1);
EXPECT_EQ(result, reinterpret_cast<void *>(0xabcdef));
}

TEST(LlvmLibcSScanfTest, CombinedConv) {
int ret_val;
int result = 0;
Expand Down