Skip to content

Commit

Permalink
[libc] add printf base 10 integer conversion
Browse files Browse the repository at this point in the history
This patch adds support for d, i, and u conversions in printf, as well
as comprehensive unit tests.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D125929
  • Loading branch information
michaelrj-google committed Jun 9, 2022
1 parent 214be9d commit 1be3669
Show file tree
Hide file tree
Showing 9 changed files with 369 additions and 3 deletions.
20 changes: 20 additions & 0 deletions libc/src/__support/CPP/Limits.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,26 @@ template <> class NumericLimits<unsigned long long> {
static constexpr unsigned long long max() { return ULLONG_MAX; }
static constexpr unsigned long long min() { return 0; }
};
template <> class NumericLimits<short> {
public:
static constexpr short max() { return SHRT_MAX; }
static constexpr short min() { return SHRT_MIN; }
};
template <> class NumericLimits<unsigned short> {
public:
static constexpr unsigned short max() { return USHRT_MAX; }
static constexpr unsigned short min() { return 0; }
};
template <> class NumericLimits<char> {
public:
static constexpr char max() { return CHAR_MAX; }
static constexpr char min() { return CHAR_MIN; }
};
template <> class NumericLimits<unsigned char> {
public:
static constexpr unsigned char max() { return UCHAR_MAX; }
static constexpr unsigned char min() { return 0; }
};
#ifdef __SIZEOF_INT128__
template <> class NumericLimits<__uint128_t> {
public:
Expand Down
1 change: 1 addition & 0 deletions libc/src/stdio/printf_core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ add_object_library(
converter_atlas.h
string_converter.h
char_converter.h
int_converter.h
DEPENDS
.writer
.core_structs
Expand Down
7 changes: 6 additions & 1 deletion libc/src/stdio/printf_core/char_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H

#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"

namespace __llvm_libc {
namespace printf_core {

void convert_char(Writer *writer, const FormatSection &to_conv) {
void inline convert_char(Writer *writer, const FormatSection &to_conv) {
char c = to_conv.conv_val_raw;

if (to_conv.min_width > 1) {
Expand All @@ -31,3 +34,5 @@ void convert_char(Writer *writer, const FormatSection &to_conv) {

} // namespace printf_core
} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
2 changes: 1 addition & 1 deletion libc/src/stdio/printf_core/converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void convert(Writer *writer, const FormatSection &to_conv) {
case 'd':
case 'i':
case 'u':
// convert_int(writer, to_conv);
convert_int(writer, to_conv);
return;
case 'o':
// convert_oct(writer, to_conv);
Expand Down
2 changes: 2 additions & 0 deletions libc/src/stdio/printf_core/converter_atlas.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include "src/stdio/printf_core/char_converter.h"

// defines convert_int
#include "src/stdio/printf_core/int_converter.h"

// defines convert_oct
// defines convert_hex

Expand Down
174 changes: 174 additions & 0 deletions libc/src/stdio/printf_core/int_converter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
//===-- Integer Converter for printf ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H

#include "src/__support/CPP/Limits.h"
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"

#include <inttypes.h>
#include <stddef.h>

namespace __llvm_libc {
namespace printf_core {

void inline convert_int(Writer *writer, const FormatSection &to_conv) {
static constexpr size_t BITS_IN_BYTE = 8;
static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE;

// This approximates the number of digits it takes to represent an integer of
// a certain number of bits. The calculation is floor((bits * 5) / 16)
// 32 -> 10 (actually needs 10)
// 64 -> 20 (actually needs 20)
// 128 -> 40 (actually needs 39)
// This estimation grows slightly faster than the actual value, but is close
// enough.

static constexpr size_t BUFF_LEN =
((sizeof(uintmax_t) * BITS_IN_BYTE * 5) / 16);
uintmax_t num = to_conv.conv_val_raw;
char buffer[BUFF_LEN];
bool is_negative = false;
FormatFlags flags = to_conv.flags;

if (to_conv.conv_name == 'u') {
// These flags are only for signed conversions, so this removes them if the
// conversion is unsigned.
flags = FormatFlags(flags &
~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
} else {
// Check if the number is negative by checking the high bit. This works even
// for smaller numbers because they're sign extended by default.
if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) {
is_negative = true;
num = -num;
}
}

switch (to_conv.length_modifier) {
case LengthModifier::none:
num = num & cpp::NumericLimits<unsigned int>::max();
break;

case LengthModifier::l:
num = num & cpp::NumericLimits<unsigned long>::max();
break;
case LengthModifier::ll:
case LengthModifier::L:
num = num & cpp::NumericLimits<unsigned long long>::max();
break;
case LengthModifier::h:
num = num & cpp::NumericLimits<unsigned short>::max();
break;
case LengthModifier::hh:
num = num & cpp::NumericLimits<unsigned char>::max();
break;
case LengthModifier::z:
num = num & cpp::NumericLimits<size_t>::max();
break;
case LengthModifier::t:
// We don't have unsigned ptrdiff so uintptr_t is used, since we need an
// unsigned type and ptrdiff is usually the same size as a pointer.
static_assert(sizeof(ptrdiff_t) == sizeof(uintptr_t));
num = num & cpp::NumericLimits<uintptr_t>::max();
break;
case LengthModifier::j:
// j is intmax, so no mask is necessary.
break;
}

// buff_cur can never reach 0, since the buffer is sized to always be able to
// contain the whole integer. This means that bounds checking it should be
// unnecessary.
size_t buff_cur = BUFF_LEN;
for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 10)
buffer[buff_cur - 1] = (num % 10) + '0';

size_t digits_written = BUFF_LEN - buff_cur;

char sign_char = 0;

if (is_negative)
sign_char = '-';
else if ((flags & FormatFlags::FORCE_SIGN) == FormatFlags::FORCE_SIGN)
sign_char = '+'; // FORCE_SIGN has precedence over SPACE_PREFIX
else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX)
sign_char = ' ';

int sign_char_len = (sign_char == 0 ? 0 : 1);

// These are signed to prevent underflow due to negative values. The eventual
// values will always be non-negative.
int zeroes;
int spaces;

// Negative precision indicates that it was not specified.
if (to_conv.precision < 0) {
if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
FormatFlags::LEADING_ZEROES) {
// If this conv has flag 0 but not - and no specified precision, it's
// padded with 0's instead of spaces identically to if precision =
// min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001"
zeroes = to_conv.min_width - digits_written - sign_char_len;
if (zeroes < 0)
zeroes = 0;
spaces = 0;
} else if (digits_written < 1) {
// If no precision is specified, precision defaults to 1. This means that
// if the integer passed to the conversion is 0, a 0 will be printed.
// Example: ("%3d", 0) -> " 0"
zeroes = 1;
spaces = to_conv.min_width - zeroes - sign_char_len;
} else {
// If there are enough digits to pass over the precision, just write the
// number, padded by spaces.
zeroes = 0;
spaces = to_conv.min_width - digits_written - sign_char_len;
}
} else {
// If precision was specified, possibly write zeroes, and possibly write
// spaces. Example: ("%5.4d", 10000) -> "10000"
// If the check for if zeroes is negative was not there, spaces would be
// incorrectly evaluated as 1.
zeroes = to_conv.precision - digits_written; // a negative value means 0
if (zeroes < 0)
zeroes = 0;
spaces = to_conv.min_width - zeroes - digits_written - sign_char_len;
}
if (spaces < 0)
spaces = 0;

if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) {
// If left justified it goes sign zeroes digits spaces
if (sign_char != 0)
writer->write(&sign_char, 1);
if (zeroes > 0)
writer->write_chars('0', zeroes);
if (digits_written > 0)
writer->write(buffer + buff_cur, digits_written);
if (spaces > 0)
writer->write_chars(' ', spaces);
} else {
// Else it goes spaces sign zeroes digits
if (spaces > 0)
writer->write_chars(' ', spaces);
if (sign_char != 0)
writer->write(&sign_char, 1);
if (zeroes > 0)
writer->write_chars('0', zeroes);
if (digits_written > 0)
writer->write(buffer + buff_cur, digits_written);
}
}

} // namespace printf_core
} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
7 changes: 6 additions & 1 deletion libc/src/stdio/printf_core/string_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H

#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"

Expand All @@ -14,7 +17,7 @@
namespace __llvm_libc {
namespace printf_core {

void convert_string(Writer *writer, const FormatSection &to_conv) {
void inline convert_string(Writer *writer, const FormatSection &to_conv) {
int string_len = 0;

for (char *cur_str = reinterpret_cast<char *>(to_conv.conv_val_ptr);
Expand Down Expand Up @@ -44,3 +47,5 @@ void convert_string(Writer *writer, const FormatSection &to_conv) {

} // namespace printf_core
} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
14 changes: 14 additions & 0 deletions libc/test/src/stdio/printf_core/converter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,17 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionLeftJustified) {
ASSERT_STREQ(str, "ghi ");
ASSERT_EQ(writer.get_chars_written(), 4ull);
}

TEST_F(LlvmLibcPrintfConverterTest, IntConversionSimple) {
__llvm_libc::printf_core::FormatSection section;
section.has_conv = true;
section.raw_string = "%d";
section.conv_name = 'd';
section.conv_val_raw = 12345;
__llvm_libc::printf_core::convert(&writer, section);

str_writer.terminate();

ASSERT_STREQ(str, "12345");
ASSERT_EQ(writer.get_chars_written(), 5ull);
}
Loading

0 comments on commit 1be3669

Please sign in to comment.