Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc] Refactor scanf reader to match printf #66023

Merged
merged 3 commits into from
Sep 22, 2023

Conversation

michaelrj-google
Copy link
Contributor

In a previous patch, the printf writer was rewritten to use a single writer class with a buffer and a callback hook. This patch refactors scanf's reader to match conceptually.

@michaelrj-google michaelrj-google requested a review from a team as a code owner September 11, 2023 22:43
@llvmbot llvmbot added the libc label Sep 11, 2023
@llvmbot
Copy link
Collaborator

llvmbot commented Sep 11, 2023

@llvm/pr-subscribers-libc

Changes

In a previous patch, the printf writer was rewritten to use a single writer class with a buffer and a callback hook. This patch refactors scanf's reader to match conceptually.

Patch is 26.54 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66023.diff

20 Files Affected:

  • (modified) libc/config/linux/aarch64/entrypoints.txt (+4)
  • (modified) libc/config/linux/riscv64/entrypoints.txt (+3-4)
  • (modified) libc/config/linux/x86_64/entrypoints.txt (+3-3)
  • (modified) libc/src/stdio/CMakeLists.txt (+21-5)
  • (modified) libc/src/stdio/scanf.cpp (+7-1)
  • (modified) libc/src/stdio/scanf_core/CMakeLists.txt (+10-30)
  • (removed) libc/src/stdio/scanf_core/file_reader.cpp (-27)
  • (removed) libc/src/stdio/scanf_core/file_reader.h (-39)
  • (modified) libc/src/stdio/scanf_core/reader.cpp (+8-24)
  • (modified) libc/src/stdio/scanf_core/reader.h (+33-16)
  • (modified) libc/src/stdio/scanf_core/scanf_main.cpp (-5)
  • (removed) libc/src/stdio/scanf_core/string_reader.cpp (-24)
  • (removed) libc/src/stdio/scanf_core/string_reader.h (-33)
  • (removed) libc/src/stdio/scanf_core/vfscanf_internal.cpp (-29)
  • (modified) libc/src/stdio/scanf_core/vfscanf_internal.h (+44-2)
  • (modified) libc/src/stdio/sscanf.cpp (+4-3)
  • (modified) libc/test/src/stdio/CMakeLists.txt (+17-4)
  • (modified) libc/test/src/stdio/fscanf_test.cpp (+28-10)
  • (modified) libc/test/src/stdio/scanf_core/CMakeLists.txt (+8-9)
  • (renamed) libc/test/src/stdio/scanf_core/reader_test.cpp (+8-7)
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index e0bf9800ec881c2..fc865dc0cbb6a57 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -126,6 +126,10 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.snprintf
     libc.src.stdio.vsprintf
     libc.src.stdio.vsnprintf
+    #TODO: Check if scanf can be enabled on aarch64
+    #libc.src.stdio.sscanf
+    #libc.src.stdio.scanf
+    #libc.src.stdio.fscanf
 
     # sys/mman.h entrypoints
     libc.src.sys.mman.madvise
diff --git a/libc/config/linux/riscv64/entrypoints.txt b/libc/config/linux/riscv64/entrypoints.txt
index 2b2f2629f78ce67..9574595ed459383 100644
--- a/libc/config/linux/riscv64/entrypoints.txt
+++ b/libc/config/linux/riscv64/entrypoints.txt
@@ -132,6 +132,9 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.vsnprintf
     libc.src.stdio.vfprintf
     libc.src.stdio.vprintf
+    libc.src.stdio.sscanf
+    libc.src.stdio.scanf
+    libc.src.stdio.fscanf
 
     # sys/mman.h entrypoints
     libc.src.sys.mman.madvise
@@ -439,10 +442,6 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.stdio.getc_unlocked
     libc.src.stdio.getchar
     libc.src.stdio.getchar_unlocked
-    libc.src.stdio.printf
-    libc.src.stdio.sscanf
-    libc.src.stdio.scanf
-    libc.src.stdio.fscanf
     libc.src.stdio.putc
     libc.src.stdio.putchar
     libc.src.stdio.puts
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index dcb8c6231432d35..1c3fcb0c1e7c9e9 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -132,6 +132,9 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdio.vsnprintf
     libc.src.stdio.vfprintf
     libc.src.stdio.vprintf
+    libc.src.stdio.sscanf
+    libc.src.stdio.scanf
+    libc.src.stdio.fscanf
 
     # sys/mman.h entrypoints
     libc.src.sys.mman.madvise
@@ -445,9 +448,6 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.stdio.getc_unlocked
     libc.src.stdio.getchar
     libc.src.stdio.getchar_unlocked
-    libc.src.stdio.sscanf
-    libc.src.stdio.scanf
-    libc.src.stdio.fscanf
     libc.src.stdio.putc
     libc.src.stdio.putchar
     libc.src.stdio.puts
diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt
index 740ec106da2e4d7..6c393855b28cada 100644
--- a/libc/src/stdio/CMakeLists.txt
+++ b/libc/src/stdio/CMakeLists.txt
@@ -292,6 +292,21 @@ add_entrypoint_object(
     libc.src.__support.File.platform_file
 )
 
+list(APPEND scanf_deps
+      libc.src.__support.arg_list
+      libc.src.stdio.scanf_core.vfscanf_internal
+)
+
+if(LLVM_LIBC_FULL_BUILD)
+  list(APPEND scanf_deps
+      libc.src.__support.File.file
+      libc.src.__support.File.platform_file
+      libc.src.__support.File.platform_stdin
+  )
+else()
+  list(APPEND scanf_copts "-DLIBC_COPT_SCANF_USE_SYSTEM_FILE")
+endif()
+
 add_entrypoint_object(
   sscanf
   SRCS
@@ -300,7 +315,6 @@ add_entrypoint_object(
     sscanf.h
   DEPENDS
     libc.src.__support.arg_list
-    libc.src.stdio.scanf_core.string_reader
     libc.src.stdio.scanf_core.reader
     libc.src.stdio.scanf_core.scanf_main
 )
@@ -312,8 +326,9 @@ add_entrypoint_object(
   HDRS
     fscanf.h
   DEPENDS
-    libc.src.__support.arg_list
-    libc.src.stdio.scanf_core.vfscanf_internal
+    ${scanf_deps}
+  COMPILE_OPTIONS
+    ${scanf_copts}
 )
 
 add_entrypoint_object(
@@ -323,8 +338,9 @@ add_entrypoint_object(
   HDRS
     scanf.h
   DEPENDS
-    libc.src.__support.arg_list
-    libc.src.stdio.scanf_core.vfscanf_internal
+    ${scanf_deps}
+  COMPILE_OPTIONS
+    ${scanf_copts}
 )
 
 add_entrypoint_object(
diff --git a/libc/src/stdio/scanf.cpp b/libc/src/stdio/scanf.cpp
index 60e77895edcc3c6..d98cc3d58e23251 100644
--- a/libc/src/stdio/scanf.cpp
+++ b/libc/src/stdio/scanf.cpp
@@ -15,6 +15,12 @@
 #include 
 #include 
 
+#ifndef LIBC_COPT_SCANF_USE_SYSTEM_FILE
+#define SCANF_STDIN __llvm_libc::stdin
+#else // LIBC_COPT_SCANF_USE_SYSTEM_FILE
+#define SCANF_STDIN ::stdin
+#endif // LIBC_COPT_SCANF_USE_SYSTEM_FILE
+
 namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(int, scanf, (const char *__restrict format, ...)) {
@@ -25,7 +31,7 @@ LLVM_LIBC_FUNCTION(int, scanf, (const char *__restrict format, ...)) {
                                  // destruction automatically.
   va_end(vlist);
   int ret_val = scanf_core::vfscanf_internal(
-      reinterpret_cast<::FILE *>(__llvm_libc::stdin), format, args);
+      reinterpret_cast<::FILE *>(SCANF_STDIN), format, args);
   // This is done to avoid including stdio.h in the internals. On most systems
   // EOF is -1, so this will be transformed into just "return ret_val".
   return (ret_val == -1) ? EOF : ret_val;
diff --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index 9f6cb9c386eb226..dd775b437a9d840 100644
--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -24,12 +24,6 @@ add_object_library(
     libc.src.__support.CPP.string_view
 )
 
-if(NOT (TARGET libc.src.__support.File.file))
-  # Not all platforms have a file implementation. If file is unvailable,
-  # then we must skip all the parts that need file.
-  return()
-endif()
-
 add_object_library(
   scanf_main
   SRCS
@@ -44,24 +38,6 @@ add_object_library(
     libc.src.__support.arg_list
 )
 
-add_object_library(
-  string_reader
-  SRCS
-    string_reader.cpp
-  HDRS
-    string_reader.h
-)
-
-add_object_library(
-  file_reader
-  SRCS
-    file_reader.cpp
-  HDRS
-    file_reader.h
-  DEPENDS
-    libc.src.__support.File.file
-)
-
 add_object_library(
   reader
   SRCS
@@ -69,8 +45,7 @@ add_object_library(
   HDRS
     reader.h
   DEPENDS
-    .string_reader
-    .file_reader
+    libc.src.__support.macros.attributes
 )
 
 add_object_library(
@@ -101,15 +76,20 @@ add_object_library(
     libc.src.__support.str_to_float
 )
 
-add_object_library(
+if(NOT (TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD)
+  # Not all platforms have a file implementation. If file is unvailable, and a 
+  # full build is requested, then we must skip all file based printf sections.
+  return()
+endif()
+
+add_header_library(
   vfscanf_internal
-  SRCS
-    vfscanf_internal.cpp
   HDRS
     vfscanf_internal.h
   DEPENDS
     .reader
-    .file_reader
     .scanf_main
+    libc.include.stdio
+    libc.src.__support.File.file
     libc.src.__support.arg_list
 )
diff --git a/libc/src/stdio/scanf_core/file_reader.cpp b/libc/src/stdio/scanf_core/file_reader.cpp
deleted file mode 100644
index 51e22299e3dd2c5..000000000000000
--- a/libc/src/stdio/scanf_core/file_reader.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===-- FILE Reader implementation for scanf --------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/file_reader.h"
-#include "src/__support/File/file.h"
-#include 
-
-namespace __llvm_libc {
-namespace scanf_core {
-
-char FileReader::get_char() {
-  char tiny_buff = 0;
-  auto result = file->read_unlocked(&tiny_buff, 1);
-  if (result.value != 1 || result.has_error())
-    return 0;
-  return tiny_buff;
-}
-
-void FileReader::unget_char(char c) { file->ungetc_unlocked(c); }
-
-} // namespace scanf_core
-} // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/file_reader.h b/libc/src/stdio/scanf_core/file_reader.h
deleted file mode 100644
index a70e00bc2413963..000000000000000
--- a/libc/src/stdio/scanf_core/file_reader.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===-- FILE Reader definition for scanf ------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_FILE_READER_H
-#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_FILE_READER_H
-
-#include "src/__support/File/file.h"
-
-#include 
-#include 
-
-namespace __llvm_libc {
-namespace scanf_core {
-
-class FileReader {
-  __llvm_libc::File *file;
-
-public:
-  FileReader(::FILE *init_file) {
-    file = reinterpret_cast<__llvm_libc::File *>(init_file);
-    file->lock();
-  }
-
-  ~FileReader() { file->unlock(); }
-
-  char get_char();
-  void unget_char(char c);
-  bool has_error() { return file->error_unlocked(); }
-};
-
-} // namespace scanf_core
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_FILE_READER_H
diff --git a/libc/src/stdio/scanf_core/reader.cpp b/libc/src/stdio/scanf_core/reader.cpp
index 4834f4d6a6e7955..45241d8d86b01fe 100644
--- a/libc/src/stdio/scanf_core/reader.cpp
+++ b/libc/src/stdio/scanf_core/reader.cpp
@@ -12,33 +12,17 @@
 namespace __llvm_libc {
 namespace scanf_core {
 
-char Reader::getc() {
-  ++cur_chars_read;
-  if (reader_type == ReaderType::String) {
-    return string_reader->get_char();
-  } else {
-    return file_reader->get_char();
-  }
-}
-
 void Reader::ungetc(char c) {
   --cur_chars_read;
-  if (reader_type == ReaderType::String) {
-    // The string reader ignores the char c passed to unget since it doesn't
-    // need to place anything back into a buffer, and modifying the source
-    // string would be dangerous.
-    return string_reader->unget_char();
-  } else {
-    return file_reader->unget_char(c);
+  if (rb != nullptr && rb->buff_cur > 0) {
+    // While technically c should be written back to the buffer, in scanf we
+    // always write the character that was already there. Additionally, the
+    // buffer is most likely to contain a string that isn't part of a file,
+    // which may not be writable.
+    --(rb->buff_cur);
+    return;
   }
+  stream_ungetc(static_cast(c), input_stream);
 }
-
-bool Reader::has_error() {
-  if (reader_type == ReaderType::File) {
-    return file_reader->has_error();
-  }
-  return false;
-}
-
 } // namespace scanf_core
 } // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/reader.h b/libc/src/stdio/scanf_core/reader.h
index 000ab02ad28f21e..7e9cfc5c8ca2fb1 100644
--- a/libc/src/stdio/scanf_core/reader.h
+++ b/libc/src/stdio/scanf_core/reader.h
@@ -9,44 +9,61 @@
 #ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H
 #define LLVM_LIBC_SRC_STDIO_SCANF_CORE_READER_H
 
-#include "src/stdio/scanf_core/file_reader.h"
-#include "src/stdio/scanf_core/string_reader.h"
+#include "src/__support/macros/attributes.h" // For LIBC_INLINE
 #include 
 
 namespace __llvm_libc {
 namespace scanf_core {
 
-enum class ReaderType { String, File };
+using StreamGetc = int (*)(void *);
+using StreamUngetc = void (*)(int, void *);
 
-class Reader final {
-  union {
-    StringReader *string_reader;
-    FileReader *file_reader;
-  };
+// This is intended to be either a raw string or a buffer syncronized with the
+// file's internal buffer.
+struct ReadBuffer {
+  char *buffer;
+  size_t buff_len;
+  size_t buff_cur = 0;
+};
+
+class Reader {
+  ReadBuffer *rb;
 
-  const ReaderType reader_type;
+  void *input_stream = nullptr;
+
+  StreamGetc stream_getc = nullptr;
+  StreamUngetc stream_ungetc = nullptr;
 
   size_t cur_chars_read = 0;
 
 public:
-  Reader(StringReader *init_string_reader)
-      : string_reader(init_string_reader), reader_type(ReaderType::String) {}
+  // TODO: Set buff_len with a proper constant
+  Reader(ReadBuffer *string_buffer) : rb(string_buffer) {}
 
-  Reader(FileReader *init_file_reader)
-      : file_reader(init_file_reader), reader_type(ReaderType::File) {}
+  Reader(void *stream, StreamGetc stream_getc_in, StreamUngetc stream_ungetc_in,
+         ReadBuffer *stream_buffer = nullptr)
+      : rb(stream_buffer), input_stream(stream), stream_getc(stream_getc_in),
+        stream_ungetc(stream_ungetc_in) {}
 
   // This returns the next character from the input and advances it by one
   // character. When it hits the end of the string or file it returns '\0' to
   // signal to stop parsing.
-  char getc();
+  LIBC_INLINE char getc() {
+    ++cur_chars_read;
+    if (rb != nullptr) {
+      char output = rb->buffer[rb->buff_cur];
+      ++(rb->buff_cur);
+      return output;
+    }
+    // This should reset the buffer if applicable.
+    return static_cast(stream_getc(input_stream));
+  }
 
   // This moves the input back by one character, placing c into the buffer if
   // this is a file reader, else c is ignored.
   void ungetc(char c);
 
   size_t chars_read() { return cur_chars_read; }
-
-  bool has_error();
 };
 
 } // namespace scanf_core
diff --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp
index 5a79d2e624ab0aa..270024f31d2b68c 100644
--- a/libc/src/stdio/scanf_core/scanf_main.cpp
+++ b/libc/src/stdio/scanf_core/scanf_main.cpp
@@ -38,11 +38,6 @@ int scanf_main(Reader *reader, const char *__restrict str,
     }
   }
 
-  if (conversions == 0 && reader->has_error()) {
-    // This is intended to be converted to EOF in the client call to avoid
-    // including stdio.h in this internal file.
-    return -1;
-  }
   return conversions;
 }
 
diff --git a/libc/src/stdio/scanf_core/string_reader.cpp b/libc/src/stdio/scanf_core/string_reader.cpp
deleted file mode 100644
index 1d728d2b9eb35e6..000000000000000
--- a/libc/src/stdio/scanf_core/string_reader.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- String Reader implementation for scanf ------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/string_reader.h"
-#include 
-
-namespace __llvm_libc {
-namespace scanf_core {
-
-char StringReader::get_char() {
-  char cur_char = string[cur_index];
-  ++cur_index;
-  return cur_char;
-}
-
-void StringReader::unget_char() { --cur_index; }
-
-} // namespace scanf_core
-} // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/string_reader.h b/libc/src/stdio/scanf_core/string_reader.h
deleted file mode 100644
index 35550b16c32140f..000000000000000
--- a/libc/src/stdio/scanf_core/string_reader.h
+++ /dev/null
@@ -1,33 +0,0 @@
-//===-- String Reader definition for scanf ----------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H
-#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H
-
-#include 
-
-namespace __llvm_libc {
-namespace scanf_core {
-
-class StringReader {
-  const char *string;
-  size_t cur_index = 0;
-
-public:
-  StringReader(const char *init_string) { string = init_string; }
-
-  ~StringReader() {}
-
-  char get_char();
-  void unget_char();
-};
-
-} // namespace scanf_core
-} // namespace __llvm_libc
-
-#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_STRING_READER_H
diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.cpp b/libc/src/stdio/scanf_core/vfscanf_internal.cpp
deleted file mode 100644
index af2f6fa01ad475d..000000000000000
--- a/libc/src/stdio/scanf_core/vfscanf_internal.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-//===-- Internal implementation of vfscanf ---------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/stdio/scanf_core/vfscanf_internal.h"
-
-#include "src/__support/arg_list.h"
-#include "src/stdio/scanf_core/file_reader.h"
-#include "src/stdio/scanf_core/reader.h"
-#include "src/stdio/scanf_core/scanf_main.h"
-
-#include 
-
-namespace __llvm_libc {
-namespace scanf_core {
-
-int vfscanf_internal(::FILE *__restrict stream, const char *__restrict format,
-                     internal::ArgList &args) {
-  FileReader file_reader(stream);
-  scanf_core::Reader reader(&file_reader);
-  return scanf_core::scanf_main(&reader, format, args);
-}
-
-} // namespace scanf_core
-} // namespace __llvm_libc
diff --git a/libc/src/stdio/scanf_core/vfscanf_internal.h b/libc/src/stdio/scanf_core/vfscanf_internal.h
index 456d1ff92056bed..693230560bc4f30 100644
--- a/libc/src/stdio/scanf_core/vfscanf_internal.h
+++ b/libc/src/stdio/scanf_core/vfscanf_internal.h
@@ -9,15 +9,57 @@
 #ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_VFSCANF_INTERNAL_H
 #define LLVM_LIBC_SRC_STDIO_SCANF_CORE_VFSCANF_INTERNAL_H
 
+#include "src/__support/File/file.h"
 #include "src/__support/arg_list.h"
+#include "src/stdio/scanf_core/reader.h"
+#include "src/stdio/scanf_core/scanf_main.h"
 
 #include 
 
 namespace __llvm_libc {
+
+namespace internal {
+#ifndef LIBC_COPT_SCANF_USE_SYSTEM_FILE
+LIBC_INLINE int getc(void *f) {
+  unsigned char c;
+  auto result = reinterpret_cast<__llvm_libc::File *>(f)->read_unlocked(&c, 1);
+  size_t r = result.value;
+  if (result.has_error() || r != 1) {
+    return '\0';
+  }
+  return c;
+}
+
+LIBC_INLINE void ungetc(int c, void *f) {
+  reinterpret_cast<__llvm_libc::File *>(f)->ungetc(c);
+}
+
+LIBC_INLINE int ferror_unlocked(FILE *f) {
+  return reinterpret_cast<__llvm_libc::File *>(f)->error_unlocked();
+}
+#else  // defined(LIBC_COPT_PRINTF_USE_SYSTEM_FILE)
+LIBC_INLINE int getc(void *f) { return ::getc(reinterpret_cast<::FILE *>(f)); }
+
+LIBC_INLINE void ungetc(int c, void *f) {
+  ::ungetc(c, reinterpret_cast<::FILE *>(f));
+}
+
+LIBC_INLINE int ferror_unlocked(::FILE *f) { return ::ferror_unlocked(f); }
+#endif // LIBC_COPT_SCANF_USE_SYSTEM_FILE
+} // namespace internal
+
 namespace scanf_core {
 
-int vfscanf_internal(::FILE *__restrict stream, const char *__restrict format,
-                     internal::ArgList &args);
+LIBC_INLINE int vfscanf_internal(::FILE *__restrict stream,
+                                 const char *__restrict format,
+                                 internal::ArgList &args) {
+  scanf_core::Reader reader(stream, &internal::getc, internal::ungetc);
+  int retval = scanf_core::scanf_main(&reader, format, args);
+  if (retval == 0 && internal::ferror_unlocked(stream)) {
+    return EOF;
+  }
+  return retval;
+}
 } // namespace scanf_core
 } // namespace __llvm_libc
 
diff --git a/libc/src/stdio/sscanf.cpp b/libc/src/stdio/sscanf.cpp
index 205b58fb3390e95..1f3e5ba888cd8fa 100644
--- a/libc/src/stdio/sscanf.cpp
+++ b/libc/src/stdio/sscanf.cpp
@@ -8,10 +8,10 @@
 
 #include "src/stdio/sscanf.h"
 
+#include "src/__support/CPP/limits.h"
 #include "src/__supp...

#include <stddef.h>

namespace __llvm_libc {
namespace scanf_core {

enum class ReaderType { String, File };
using StreamGetc = int (*)(void *);
using StreamUngetc = void (*)(int, void *);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we do this with templates instead? Then we'd presumably just specialize it for whether or not the user it sscanf or fscanf in the implementation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reader isn't templated because it's passed to every converter. Templating the reader would mean having two copies of the conversion code in the resulting binary. This is why we have function pointers here (and in the printf writer), to give that runtime polymorphism. The string versions of the scanf (and printf) functions never call the function pointers though.

Copy link
Contributor

@jhuber6 jhuber6 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes make sense to me overall with some nits.

Copy link
Collaborator

@sivachandra sivachandra left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Few nits, but otherwise OK.

libc/config/linux/riscv64/entrypoints.txt Show resolved Hide resolved
libc/src/stdio/scanf_core/vfscanf_internal.h Outdated Show resolved Hide resolved
libc/src/stdio/scanf_core/vfscanf_internal.h Outdated Show resolved Hide resolved
libc/test/src/stdio/CMakeLists.txt Outdated Show resolved Hide resolved
In a previous patch, the printf writer was rewritten to use a single
writer class with a buffer and a callback hook. This patch refactors
scanf's reader to match conceptually.
Copy link
Contributor

@jhuber6 jhuber6 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG, very minor nits

Comment on lines 64 to 66
if (retval == 0 && internal::ferror_unlocked(stream)) {
return EOF;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (retval == 0 && internal::ferror_unlocked(stream)) {
return EOF;
}
if (retval == 0 && internal::ferror_unlocked(stream))
return EOF;

LLVM style omits braces like these

@@ -30,7 +30,7 @@ General Flags:
These compile-time flags will change the behavior of LLVM-libc's printf when it
is compiled. Combinations of flags that are incompatible will be marked.

LIBC_COPT_PRINTF_USE_SYSTEM_FILE
LIBC_COPT_STDIO_USE_SYSTEM_FILE
--------------------------------
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
--------------------------------
-------------------------------

One dash too many.

@michaelrj-google michaelrj-google merged commit a5a008f into llvm:main Sep 22, 2023
2 of 3 checks passed
Guzhu-AMD pushed a commit to GPUOpen-Drivers/llvm-project that referenced this pull request Sep 28, 2023
Local branch amd-gfx 40bdeb2 Merged main:ab1db262724e into amd-gfx:957cf8f1918f
Remote branch main a5a008f [libc] Refactor scanf reader to match printf (llvm#66023)
@michaelrj-google michaelrj-google deleted the testbranch branch November 1, 2023 22:19
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants