[libc] add printf base 10 integer conversion

This patch adds support for d, i, and u conversions in printf, as well as comprehensive unit tests. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D125929
llvm · Jun 9, 2022 · 1be3669 · 1be3669
1 parent 214be9d
commit 1be3669
Show file tree

Hide file tree

Showing 9 changed files with 369 additions and 3 deletions.
diff --git a/libc/src/__support/CPP/Limits.h b/libc/src/__support/CPP/Limits.h
@@ -52,6 +52,26 @@ template <> class NumericLimits<unsigned long long> {
   static constexpr unsigned long long max() { return ULLONG_MAX; }
   static constexpr unsigned long long min() { return 0; }
 };
+template <> class NumericLimits<short> {
+public:
+  static constexpr short max() { return SHRT_MAX; }
+  static constexpr short min() { return SHRT_MIN; }
+};
+template <> class NumericLimits<unsigned short> {
+public:
+  static constexpr unsigned short max() { return USHRT_MAX; }
+  static constexpr unsigned short min() { return 0; }
+};
+template <> class NumericLimits<char> {
+public:
+  static constexpr char max() { return CHAR_MAX; }
+  static constexpr char min() { return CHAR_MIN; }
+};
+template <> class NumericLimits<unsigned char> {
+public:
+  static constexpr unsigned char max() { return UCHAR_MAX; }
+  static constexpr unsigned char min() { return 0; }
+};
 #ifdef __SIZEOF_INT128__
 template <> class NumericLimits<__uint128_t> {
 public:

diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -55,6 +55,7 @@ add_object_library(
     converter_atlas.h
     string_converter.h
     char_converter.h
+    int_converter.h
   DEPENDS
     .writer
     .core_structs

diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
@@ -6,13 +6,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
+
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
 
 namespace __llvm_libc {
 namespace printf_core {
 
-void convert_char(Writer *writer, const FormatSection &to_conv) {
+void inline convert_char(Writer *writer, const FormatSection &to_conv) {
   char c = to_conv.conv_val_raw;
 
   if (to_conv.min_width > 1) {
@@ -31,3 +34,5 @@ void convert_char(Writer *writer, const FormatSection &to_conv) {
 
 } // namespace printf_core
 } // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
diff --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp
@@ -42,7 +42,7 @@ void convert(Writer *writer, const FormatSection &to_conv) {
   case 'd':
   case 'i':
   case 'u':
-    // convert_int(writer, to_conv);
+    convert_int(writer, to_conv);
     return;
   case 'o':
     // convert_oct(writer, to_conv);

diff --git a/libc/src/stdio/printf_core/converter_atlas.h b/libc/src/stdio/printf_core/converter_atlas.h
@@ -20,6 +20,8 @@
 #include "src/stdio/printf_core/char_converter.h"
 
 // defines convert_int
+#include "src/stdio/printf_core/int_converter.h"
+
 // defines convert_oct
 // defines convert_hex
 

diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
@@ -0,0 +1,174 @@
+//===-- Integer Converter for printf ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
+
+#include "src/__support/CPP/Limits.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace printf_core {
+
+void inline convert_int(Writer *writer, const FormatSection &to_conv) {
+  static constexpr size_t BITS_IN_BYTE = 8;
+  static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE;
+
+  // This approximates the number of digits it takes to represent an integer of
+  // a certain number of bits. The calculation is floor((bits * 5) / 16)
+  // 32 -> 10 (actually needs 10)
+  // 64 -> 20 (actually needs 20)
+  // 128 -> 40 (actually needs 39)
+  // This estimation grows slightly faster than the actual value, but is close
+  // enough.
+
+  static constexpr size_t BUFF_LEN =
+      ((sizeof(uintmax_t) * BITS_IN_BYTE * 5) / 16);
+  uintmax_t num = to_conv.conv_val_raw;
+  char buffer[BUFF_LEN];
+  bool is_negative = false;
+  FormatFlags flags = to_conv.flags;
+
+  if (to_conv.conv_name == 'u') {
+    // These flags are only for signed conversions, so this removes them if the
+    // conversion is unsigned.
+    flags = FormatFlags(flags &
+                        ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
+  } else {
+    // Check if the number is negative by checking the high bit. This works even
+    // for smaller numbers because they're sign extended by default.
+    if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) {
+      is_negative = true;
+      num = -num;
+    }
+  }
+
+  switch (to_conv.length_modifier) {
+  case LengthModifier::none:
+    num = num & cpp::NumericLimits<unsigned int>::max();
+    break;
+
+  case LengthModifier::l:
+    num = num & cpp::NumericLimits<unsigned long>::max();
+    break;
+  case LengthModifier::ll:
+  case LengthModifier::L:
+    num = num & cpp::NumericLimits<unsigned long long>::max();
+    break;
+  case LengthModifier::h:
+    num = num & cpp::NumericLimits<unsigned short>::max();
+    break;
+  case LengthModifier::hh:
+    num = num & cpp::NumericLimits<unsigned char>::max();
+    break;
+  case LengthModifier::z:
+    num = num & cpp::NumericLimits<size_t>::max();
+    break;
+  case LengthModifier::t:
+    // We don't have unsigned ptrdiff so uintptr_t is used, since we need an
+    // unsigned type and ptrdiff is usually the same size as a pointer.
+    static_assert(sizeof(ptrdiff_t) == sizeof(uintptr_t));
+    num = num & cpp::NumericLimits<uintptr_t>::max();
+    break;
+  case LengthModifier::j:
+    // j is intmax, so no mask is necessary.
+    break;
+  }
+
+  // buff_cur can never reach 0, since the buffer is sized to always be able to
+  // contain the whole integer. This means that bounds checking it should be
+  // unnecessary.
+  size_t buff_cur = BUFF_LEN;
+  for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 10)
+    buffer[buff_cur - 1] = (num % 10) + '0';
+
+  size_t digits_written = BUFF_LEN - buff_cur;
+
+  char sign_char = 0;
+
+  if (is_negative)
+    sign_char = '-';
+  else if ((flags & FormatFlags::FORCE_SIGN) == FormatFlags::FORCE_SIGN)
+    sign_char = '+'; // FORCE_SIGN has precedence over SPACE_PREFIX
+  else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX)
+    sign_char = ' ';
+
+  int sign_char_len = (sign_char == 0 ? 0 : 1);
+
+  // These are signed to prevent underflow due to negative values. The eventual
+  // values will always be non-negative.
+  int zeroes;
+  int spaces;
+
+  // Negative precision indicates that it was not specified.
+  if (to_conv.precision < 0) {
+    if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
+        FormatFlags::LEADING_ZEROES) {
+      // If this conv has flag 0 but not - and no specified precision, it's
+      // padded with 0's instead of spaces identically to if precision =
+      // min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001"
+      zeroes = to_conv.min_width - digits_written - sign_char_len;
+      if (zeroes < 0)
+        zeroes = 0;
+      spaces = 0;
+    } else if (digits_written < 1) {
+      // If no precision is specified, precision defaults to 1. This means that
+      // if the integer passed to the conversion is 0, a 0 will be printed.
+      // Example: ("%3d", 0) -> "  0"
+      zeroes = 1;
+      spaces = to_conv.min_width - zeroes - sign_char_len;
+    } else {
+      // If there are enough digits to pass over the precision, just write the
+      // number, padded by spaces.
+      zeroes = 0;
+      spaces = to_conv.min_width - digits_written - sign_char_len;
+    }
+  } else {
+    // If precision was specified, possibly write zeroes, and possibly write
+    // spaces. Example: ("%5.4d", 10000) -> "10000"
+    // If the check for if zeroes is negative was not there, spaces would be
+    // incorrectly evaluated as 1.
+    zeroes = to_conv.precision - digits_written; // a negative value means 0
+    if (zeroes < 0)
+      zeroes = 0;
+    spaces = to_conv.min_width - zeroes - digits_written - sign_char_len;
+  }
+  if (spaces < 0)
+    spaces = 0;
+
+  if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) {
+    // If left justified it goes sign zeroes digits spaces
+    if (sign_char != 0)
+      writer->write(&sign_char, 1);
+    if (zeroes > 0)
+      writer->write_chars('0', zeroes);
+    if (digits_written > 0)
+      writer->write(buffer + buff_cur, digits_written);
+    if (spaces > 0)
+      writer->write_chars(' ', spaces);
+  } else {
+    // Else it goes spaces sign zeroes digits
+    if (spaces > 0)
+      writer->write_chars(' ', spaces);
+    if (sign_char != 0)
+      writer->write(&sign_char, 1);
+    if (zeroes > 0)
+      writer->write_chars('0', zeroes);
+    if (digits_written > 0)
+      writer->write(buffer + buff_cur, digits_written);
+  }
+}
+
+} // namespace printf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
diff --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
@@ -6,6 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
+
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
 
@@ -14,7 +17,7 @@
 namespace __llvm_libc {
 namespace printf_core {
 
-void convert_string(Writer *writer, const FormatSection &to_conv) {
+void inline convert_string(Writer *writer, const FormatSection &to_conv) {
   int string_len = 0;
 
   for (char *cur_str = reinterpret_cast<char *>(to_conv.conv_val_ptr);
@@ -44,3 +47,5 @@ void convert_string(Writer *writer, const FormatSection &to_conv) {
 
 } // namespace printf_core
 } // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -181,3 +181,17 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionLeftJustified) {
   ASSERT_STREQ(str, "ghi ");
   ASSERT_EQ(writer.get_chars_written(), 4ull);
 }
+
+TEST_F(LlvmLibcPrintfConverterTest, IntConversionSimple) {
+  __llvm_libc::printf_core::FormatSection section;
+  section.has_conv = true;
+  section.raw_string = "%d";
+  section.conv_name = 'd';
+  section.conv_val_raw = 12345;
+  __llvm_libc::printf_core::convert(&writer, section);
+
+  str_writer.terminate();
+
+  ASSERT_STREQ(str, "12345");
+  ASSERT_EQ(writer.get_chars_written(), 5ull);
+}