Skip to content

Conversation

@shubhe25p
Copy link
Contributor

@shubhe25p shubhe25p commented Nov 29, 2025

Add %lc support to libc printf by utilizing wcrtomb internal function, also added relevant unit tests. The work has spun up new issues stated below. Resolves #166598

@llvmbot llvmbot added the libc label Nov 29, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 29, 2025

@llvm/pr-subscribers-libc

Author: Shubh Pachchigar (shubhe25p)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/169983.diff

4 Files Affected:

  • (modified) libc/src/stdio/printf_core/CMakeLists.txt (+3)
  • (modified) libc/src/stdio/printf_core/char_converter.h (+24-4)
  • (modified) libc/test/src/stdio/printf_core/CMakeLists.txt (+1)
  • (modified) libc/test/src/stdio/printf_core/converter_test.cpp (+54-1)
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 624129b2b36e7..f35a65de1f110 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -111,6 +111,7 @@ add_header_library(
     .printf_config
     .writer
     libc.include.inttypes
+    libc.hdr.types.wchar_t
     libc.src.__support.big_int
     libc.src.__support.common
     libc.src.__support.CPP.limits
@@ -123,6 +124,8 @@ add_header_library(
     libc.src.__support.integer_to_string
     libc.src.__support.libc_assert
     libc.src.__support.uint128
+    libc.src.__support.wchar.mbstate
+    libc.src.__support.wchar.wcrtomb
     libc.src.__support.StringUtil.error_to_string
     libc.src.string.memory_utils.inline_memcpy
 )
diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index fd2eb2553887a..31dd34fe7a797 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -1,4 +1,4 @@
-//===-- String Converter for printf -----------------------------*- C++ -*-===//
+//===-- Character Converter for printf -----------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,7 +9,10 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
 
+#include "hdr/types/wchar_t.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
@@ -20,8 +23,11 @@ namespace printf_core {
 template <WriteMode write_mode>
 LIBC_INLINE int convert_char(Writer<write_mode> *writer,
                              const FormatSection &to_conv) {
-  char c = static_cast<char>(to_conv.conv_val_raw);
-
+  char c;
+  wchar_t wc;
+  char mb_str[MB_LEN_MAX];
+  internal::mbstate internal_mbstate = {0};
+  int ret = 0;
   constexpr int STRING_LEN = 1;
 
   size_t padding_spaces =
@@ -33,7 +39,21 @@ LIBC_INLINE int convert_char(Writer<write_mode> *writer,
     RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
   }
 
-  RET_IF_RESULT_NEGATIVE(writer->write(c));
+  if (to_conv.length_modifier == LengthModifier::l) {
+    wc = static_cast<wchar_t>(to_conv.conv_val_raw);
+    ret = internal::wcrtomb(mb_str, wc, &internal_mbstate);
+    if (ret <= 0) {
+      return -1;
+    }
+
+    for (int i = 0; i < ret; i++) {
+      RET_IF_RESULT_NEGATIVE(writer->write(mb_str[i]));
+    }
+
+  } else {
+    c = static_cast<char>(to_conv.conv_val_raw);
+    RET_IF_RESULT_NEGATIVE(writer->write(c));
+  }
 
   // If the padding is on the right side, write the spaces last.
   if (padding_spaces > 0 &&
diff --git a/libc/test/src/stdio/printf_core/CMakeLists.txt b/libc/test/src/stdio/printf_core/CMakeLists.txt
index ff7ebbc4f5fd0..a4c919420777d 100644
--- a/libc/test/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/test/src/stdio/printf_core/CMakeLists.txt
@@ -35,4 +35,5 @@ add_libc_unittest(
     libc.src.stdio.printf_core.converter
     libc.src.stdio.printf_core.writer
     libc.src.stdio.printf_core.core_structs
+    libc.hdr.types.wchar_t
 )
diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
index 2dae2a22c864c..026e36747d4df 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -9,7 +9,7 @@
 #include "src/stdio/printf_core/converter.h"
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
-
+#include "hdr/types/wchar_t.h"
 #include "test/UnitTest/Test.h"
 
 class LlvmLibcPrintfConverterTest : public LIBC_NAMESPACE::testing::Test {
@@ -255,3 +255,56 @@ TEST_F(LlvmLibcPrintfConverterTest, OctConversion) {
   ASSERT_STREQ(str, "1234");
   ASSERT_EQ(writer.get_chars_written(), size_t{4});
 }
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversion) {
+
+  LIBC_NAMESPACE::printf_core::FormatSection section;
+  section.has_conv = true;
+  section.raw_string = "%c";
+  section.conv_name = 'c';
+  section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l;
+  section.conv_val_raw = static_cast<wchar_t>(L'S');
+
+  LIBC_NAMESPACE::printf_core::convert(&writer, section);
+
+  wb.buff[wb.buff_cur] = '\0';
+
+  ASSERT_STREQ(str, "S");
+  ASSERT_EQ(writer.get_chars_written(), size_t{1});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionLeftJustified) {
+  LIBC_NAMESPACE::printf_core::FormatSection left_justified_conv;
+  left_justified_conv.has_conv = true;
+  left_justified_conv.raw_string = "%-4c";
+  left_justified_conv.conv_name = 'c';
+  left_justified_conv.length_modifier =
+      LIBC_NAMESPACE::printf_core::LengthModifier::l;
+  left_justified_conv.flags =
+      LIBC_NAMESPACE::printf_core::FormatFlags::LEFT_JUSTIFIED;
+  left_justified_conv.min_width = 4;
+  left_justified_conv.conv_val_raw = static_cast<wchar_t>(L'S');
+
+  LIBC_NAMESPACE::printf_core::convert(&writer, left_justified_conv);
+  wb.buff[wb.buff_cur] = '\0';
+
+  ASSERT_STREQ(str, "S   ");
+  ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionRightJustified) {
+  LIBC_NAMESPACE::printf_core::FormatSection right_justified_conv;
+  right_justified_conv.has_conv = true;
+  right_justified_conv.raw_string = "%4c";
+  right_justified_conv.conv_name = 'c';
+  right_justified_conv.length_modifier =
+      LIBC_NAMESPACE::printf_core::LengthModifier::l;
+  right_justified_conv.min_width = 4;
+  right_justified_conv.conv_val_raw = static_cast<wchar_t>(L'S');
+
+  LIBC_NAMESPACE::printf_core::convert(&writer, right_justified_conv);
+  wb.buff[wb.buff_cur] = '\0';
+
+  ASSERT_STREQ(str, "   S");
+  ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}

@github-actions
Copy link

github-actions bot commented Nov 29, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@shubhe25p
Copy link
Contributor Author

shubhe25p commented Nov 29, 2025

This PR has spun up three new interesting directions, each of which seems feasible with some guidance. I am not sure which one can be pulled into this PR and if it is even an issue. Let me know your thoughts @michaelrj-google:

  1. wchar is a distinct integral type in C++ standard but BigInt guard clause will throw an error as wchar is not recognized. Here I have a workaround where I cast to unsigned int. So the work will involve adding wchar to BigInt.
  2. Adding wchar support on windows, currently a static assert in wcrtomb causes a build failure on windows platforms and work will involve adding UTF-16 support to wcrtomb
  3. %ls support in printf, which is a natural extension to this work and would involve a looping over a sequence of wchar

Add %lc support to libc printf by utilizing wcrtomb
internal function, also added relevant unit tests.

Issue: llvm#166598
@shubhe25p shubhe25p force-pushed the br_printf_lc_support branch from 38f014a to c2a7797 Compare November 29, 2025 11:03
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[libc] Support %lc in printf

2 participants