Skip to content

Commit

Permalink
[libc] add int to string for extended bases
Browse files Browse the repository at this point in the history
The default IntegerToString class only supports base 10, this patch adds
a version which supports any base between 2 and 36 inclusive. This will
be used in an upcoming patch.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D131301
  • Loading branch information
michaelrj-google committed Aug 10, 2022
1 parent 5dd8553 commit 23ace05
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 19 deletions.
98 changes: 79 additions & 19 deletions libc/src/__support/integer_to_string.h
Expand Up @@ -9,14 +9,23 @@
#ifndef LLVM_LIBC_SRC_SUPPORT_INTEGER_TO_STRING_H
#define LLVM_LIBC_SRC_SUPPORT_INTEGER_TO_STRING_H

#include "src/__support/CPP/ArrayRef.h"
#include "src/__support/CPP/StringView.h"
#include "src/__support/CPP/optional.h"
#include "src/__support/CPP/type_traits.h"

namespace __llvm_libc {

template <typename T> class IntegerToString {
template <typename T, uint8_t BASE = 10> class IntegerToString {
public:
// We size the string buffer using an approximation algorithm:
static constexpr inline size_t floor_log_2(size_t num) {
size_t i = 0;
for (; num > 1; num /= 2) {
++i;
}
return i;
}
// We size the string buffer for base 10 using an approximation algorithm:
//
// size = ceil(sizeof(T) * 5 / 2)
//
Expand All @@ -34,8 +43,20 @@ template <typename T> class IntegerToString {
// overhead is small enough to tolerate. In the actual formula below, we
// add an additional byte to accommodate the '-' sign in case of signed
// integers.
static constexpr size_t BUFSIZE =
(sizeof(T) * 5 + 1) / 2 + (cpp::is_signed<T>() ? 1 : 0);
// For other bases, we approximate by rounding down to the nearest power of
// two base, since the space needed is easy to calculate and it won't
// overestimate by too much.

template <uint8_t STATIC_BASE> static constexpr size_t bufsize() {
constexpr size_t BITS_PER_DIGIT = floor_log_2(STATIC_BASE);
constexpr size_t BUFSIZE_COMMON =
((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT);
constexpr size_t BUFSIZE_BASE10 = (sizeof(T) * 5 + 1) / 2;
return (cpp::is_signed<T>() ? 1 : 0) +
(STATIC_BASE == 10 ? BUFSIZE_BASE10 : BUFSIZE_COMMON);
}

static constexpr size_t BUFSIZE = bufsize<BASE>();

private:
static_assert(cpp::is_integral_v<T>,
Expand All @@ -44,34 +65,73 @@ template <typename T> class IntegerToString {
using UnsignedType = cpp::make_unsigned_t<T>;

char strbuf[BUFSIZE] = {'\0'};
size_t len = 0;
cpp::StringView str_view;

constexpr void convert(UnsignedType val) {
size_t buffptr = BUFSIZE;
if (val == 0) {
strbuf[buffptr - 1] = '0';
static inline constexpr cpp::StringView
convert_alpha_numeric(T val, cpp::MutableArrayRef<char> &buffer,
bool lowercase, const uint8_t conv_base) {
UnsignedType uval = val < 0 ? UnsignedType(-val) : UnsignedType(val);

const char a = lowercase ? 'a' : 'A';

size_t len = 0;

size_t buffptr = buffer.size();
if (uval == 0) {
buffer[buffptr - 1] = '0';
--buffptr;
} else {
for (; val > 0; --buffptr, val /= 10)
strbuf[buffptr - 1] = (val % 10) + '0';
for (; uval > 0; --buffptr, uval /= conv_base) {
UnsignedType digit = (uval % conv_base);
buffer[buffptr - 1] = digit < 10 ? digit + '0' : digit + a - 10;
}
}
len = BUFSIZE - buffptr;
}
len = buffer.size() - buffptr;

public:
constexpr explicit IntegerToString(T val) {
convert(val < 0 ? UnsignedType(-val) : UnsignedType(val));
if (val < 0) {
// This branch will be taken only for negative signed values.
++len;
strbuf[BUFSIZE - len] = '-';
buffer[buffer.size() - len] = '-';
}
cpp::StringView buff_str(buffer.data() + buffer.size() - len, len);
return buff_str;
}

// This function exists to check at compile time that the base is valid, as
// well as to convert the templated call into a non-templated call. This
// allows the compiler to decide to do strength reduction and constant folding
// on the base or not, depending on if size or performance is required.
template <uint8_t STATIC_BASE = BASE,
cpp::enable_if_t<2 <= STATIC_BASE && STATIC_BASE <= 36, int> = 0>
static inline constexpr cpp::StringView
convert_internal(T val, cpp::MutableArrayRef<char> &buffer, bool lowercase) {
return convert_alpha_numeric(val, buffer, lowercase, STATIC_BASE);
}

public:
template <uint8_t STATIC_BASE = BASE>
static inline cpp::optional<cpp::StringView>
convert(T val, cpp::MutableArrayRef<char> &buffer, bool lowercase) {
// If This function can actually be a constexpr, then the below "if" will be
// optimized out.
if (buffer.size() < bufsize<STATIC_BASE>())
return cpp::optional<cpp::StringView>();
return cpp::optional<cpp::StringView>(
convert_internal<STATIC_BASE>(val, buffer, lowercase));
}

cpp::StringView str() const {
return cpp::StringView(strbuf + BUFSIZE - len, len);
constexpr explicit IntegerToString(T val) {
cpp::MutableArrayRef<char> bufref(strbuf, BUFSIZE);
str_view = convert_internal<BASE>(val, bufref, true);
}

constexpr explicit IntegerToString(T val, bool lowercase) {
cpp::MutableArrayRef<char> bufref(strbuf, BUFSIZE);
str_view = convert_internal<BASE>(val, bufref, lowercase);
}

cpp::StringView str() const { return str_view; }

operator cpp::StringView() const { return str(); }
};

Expand Down
58 changes: 58 additions & 0 deletions libc/test/src/__support/integer_to_string_test.cpp
Expand Up @@ -14,6 +14,7 @@
#include "limits.h"

using __llvm_libc::integer_to_string;
using __llvm_libc::IntegerToString;
using __llvm_libc::cpp::StringView;

TEST(LlvmLibcIntegerToStringTest, UINT8) {
Expand Down Expand Up @@ -183,3 +184,60 @@ TEST(LlvmLibcIntegerToStringTest, INT64) {
EXPECT_EQ(integer_to_string(int64_t(INT64_MIN)).str(),
(StringView("-9223372036854775808")));
}

TEST(LlvmLibcIntegerToStringTest, UINT64_Base_10) {
EXPECT_EQ((IntegerToString<uint64_t, 10>(int64_t(0)).str()), StringView("0"));
EXPECT_EQ((IntegerToString<uint64_t, 10>(int64_t(1234567890123456789)).str()),
StringView("1234567890123456789"));
}

TEST(LlvmLibcIntegerToStringTest, UINT64_Base_8) {
EXPECT_EQ((IntegerToString<uint64_t, 8>(int64_t(0)).str()), StringView("0"));
EXPECT_EQ((IntegerToString<uint64_t, 8>(int64_t(012345)).str()),
StringView("12345"));
EXPECT_EQ(
(IntegerToString<uint64_t, 8>(int64_t(0123456701234567012345)).str()),
StringView("123456701234567012345"));
EXPECT_EQ(
(IntegerToString<uint64_t, 8>(int64_t(01777777777777777777777)).str()),
StringView("1777777777777777777777"));
}

TEST(LlvmLibcIntegerToStringTest, UINT64_Base_16) {
EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0)).str()), StringView("0"));
EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0x12345)).str()),
StringView("12345"));
EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0x123456789abcdef)).str()),
StringView("123456789abcdef"));
EXPECT_EQ(
(IntegerToString<uint64_t, 16>(int64_t(0x123456789abcdef), false).str()),
StringView("123456789ABCDEF"));
EXPECT_EQ((IntegerToString<uint64_t, 16>(int64_t(0xffffffffffffffff)).str()),
StringView("ffffffffffffffff"));
}

TEST(LlvmLibcIntegerToStringTest, UINT64_Base_2) {
EXPECT_EQ((IntegerToString<uint64_t, 2>(int64_t(0)).str()), StringView("0"));
EXPECT_EQ((IntegerToString<uint64_t, 2>(int64_t(0xf0c)).str()),
StringView("111100001100"));
EXPECT_EQ((IntegerToString<uint64_t, 2>(int64_t(0x123abc)).str()),
StringView("100100011101010111100"));
EXPECT_EQ(
(IntegerToString<uint64_t, 2>(int64_t(0xffffffffffffffff)).str()),
StringView(
"1111111111111111111111111111111111111111111111111111111111111111"));
}

TEST(LlvmLibcIntegerToStringTest, UINT64_Base_36) {
EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(0)).str()), StringView("0"));
EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(12345)).str()),
StringView("9ix"));
EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(1047601316295595)).str()),
StringView("abcdefghij"));
EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(2092218013456445)).str()),
StringView("klmnopqrst"));
EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(1867590395), false).str()),
StringView("UVWXYZ"));
EXPECT_EQ((IntegerToString<uint64_t, 36>(int64_t(0xffffffffffffffff)).str()),
StringView("3w5e11264sgsf"));
}

0 comments on commit 23ace05

Please sign in to comment.