Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SystemZ][z/OS] Complete EBCDIC I/O support #75212

Merged
merged 2 commits into from
Dec 13, 2023

Conversation

abhina-sree
Copy link
Contributor

This patch completes the support for EBCDIC I/O support on z/OS using the autoconversion functions.

@abhina-sree abhina-sree self-assigned this Dec 12, 2023
@llvmbot llvmbot added clang Clang issues not falling into any other category llvm:support testing-tools labels Dec 12, 2023
@llvmbot
Copy link
Collaborator

llvmbot commented Dec 12, 2023

@llvm/pr-subscribers-testing-tools
@llvm/pr-subscribers-llvm-support

@llvm/pr-subscribers-clang

Author: Abhina Sree (abhina-sree)

Changes

This patch completes the support for EBCDIC I/O support on z/OS using the autoconversion functions.


Full diff: https://github.com/llvm/llvm-project/pull/75212.diff

9 Files Affected:

  • (modified) clang/tools/c-arcmt-test/c-arcmt-test.c (+10-1)
  • (modified) clang/tools/c-index-test/c-index-test.c (+9)
  • (modified) llvm/include/llvm/Support/AutoConvert.h (+23-2)
  • (modified) llvm/lib/Support/AutoConvert.cpp (+62-11)
  • (modified) llvm/lib/Support/InitLLVM.cpp (+39-2)
  • (modified) llvm/lib/Support/Unix/Program.inc (+5)
  • (modified) llvm/lib/Support/raw_ostream.cpp (+9)
  • (modified) llvm/utils/count/CMakeLists.txt (+4)
  • (modified) llvm/utils/count/count.c (+9-1)
diff --git a/clang/tools/c-arcmt-test/c-arcmt-test.c b/clang/tools/c-arcmt-test/c-arcmt-test.c
index 3bbb2d5d6a856..00999f188c7dc 100644
--- a/clang/tools/c-arcmt-test/c-arcmt-test.c
+++ b/clang/tools/c-arcmt-test/c-arcmt-test.c
@@ -1,8 +1,9 @@
 /* c-arcmt-test.c */
 
 #include "clang-c/Index.h"
-#include <stdlib.h>
+#include "llvm/Support/AutoConvert.h"
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 #if defined(_WIN32)
 #include <io.h>
@@ -107,6 +108,14 @@ static void flush_atexit(void) {
 }
 
 int main(int argc, const char **argv) {
+#ifdef __MVS__
+  if (enableAutoConversion(fileno(stdout)) == -1)
+    fprintf(stderr, "Setting conversion on stdout failed\n");
+
+  if (enableAutoConversion(fileno(stderr)) == -1)
+    fprintf(stderr, "Setting conversion on stderr failed\n");
+#endif
+
   thread_info client_data;
 
   atexit(flush_atexit);
diff --git a/clang/tools/c-index-test/c-index-test.c b/clang/tools/c-index-test/c-index-test.c
index 2c0c9cb8eb5e4..6fa400a0675b7 100644
--- a/clang/tools/c-index-test/c-index-test.c
+++ b/clang/tools/c-index-test/c-index-test.c
@@ -8,6 +8,7 @@
 #include "clang-c/Documentation.h"
 #include "clang-c/Index.h"
 #include "clang/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
 #include <assert.h>
 #include <ctype.h>
 #include <stdio.h>
@@ -5150,6 +5151,14 @@ static void flush_atexit(void) {
 int main(int argc, const char **argv) {
   thread_info client_data;
 
+#ifdef __MVS__
+  if (enableAutoConversion(fileno(stdout)) == -1)
+    fprintf(stderr, "Setting conversion on stdout failed\n");
+
+  if (enableAutoConversion(fileno(stderr)) == -1)
+    fprintf(stderr, "Setting conversion on stderr failed\n");
+#endif
+
   atexit(flush_atexit);
 
 #ifdef CLANG_HAVE_LIBXML
diff --git a/llvm/include/llvm/Support/AutoConvert.h b/llvm/include/llvm/Support/AutoConvert.h
index bcf7473feac8f..6608dd461d726 100644
--- a/llvm/include/llvm/Support/AutoConvert.h
+++ b/llvm/include/llvm/Support/AutoConvert.h
@@ -15,10 +15,27 @@
 #define LLVM_SUPPORT_AUTOCONVERT_H
 
 #ifdef __MVS__
-#define CCSID_IBM_1047 1047
-#define CCSID_UTF_8 1208
+#include <_Ccsid.h>
+#ifdef __cplusplus
 #include <system_error>
+#endif // __cplusplus
 
+#define CCSID_IBM_1047 1047
+#define CCSID_UTF_8 1208
+#define CCSID_ISO8859_1 819
+
+#ifdef __cplusplus
+extern "C" {
+#endif // __cplusplus
+int enableAutoConversion(int FD);
+int disableAutoConversion(int FD);
+int restoreStdHandleAutoConversion(int FD);
+int overrideAutoConversion(int FD, char *Filetag);
+#ifdef __cplusplus
+}
+#endif // __cplusplus
+
+#ifdef __cplusplus
 namespace llvm {
 
 /// \brief Disable the z/OS enhanced ASCII auto-conversion for the file
@@ -30,10 +47,14 @@ std::error_code disableAutoConversion(int FD);
 /// codepage.
 std::error_code enableAutoConversion(int FD);
 
+/// Restore the z/OS enhanced ASCII auto-conversion for the std handle.
+std::error_code restoreStdHandleAutoConversion(int FD);
+
 /// \brief Set the tag information for a file descriptor.
 std::error_code setFileTag(int FD, int CCSID, bool Text);
 
 } // namespace llvm
+#endif // __cplusplus
 
 #endif // __MVS__
 
diff --git a/llvm/lib/Support/AutoConvert.cpp b/llvm/lib/Support/AutoConvert.cpp
index 4fb7e242c3480..8170e553ac6e1 100644
--- a/llvm/lib/Support/AutoConvert.cpp
+++ b/llvm/lib/Support/AutoConvert.cpp
@@ -14,21 +14,36 @@
 #ifdef __MVS__
 
 #include "llvm/Support/AutoConvert.h"
+#include <cassert>
 #include <fcntl.h>
 #include <sys/stat.h>
+#include <unistd.h>
 
-std::error_code llvm::disableAutoConversion(int FD) {
+static int savedStdHandleAutoConversionMode[3] = {-1, -1, -1};
+
+int disableAutoConversion(int FD) {
   static const struct f_cnvrt Convert = {
-      SETCVTOFF,        // cvtcmd
-      0,                // pccsid
-      (short)FT_BINARY, // fccsid
+      SETCVTOFF, // cvtcmd
+      0,         // pccsid
+      0,         // fccsid
   };
-  if (fcntl(FD, F_CONTROL_CVT, &Convert) == -1)
-    return std::error_code(errno, std::generic_category());
-  return std::error_code();
+
+  return fcntl(FD, F_CONTROL_CVT, &Convert);
 }
 
-std::error_code llvm::enableAutoConversion(int FD) {
+int restoreStdHandleAutoConversion(int FD) {
+  assert(FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO);
+  if (savedStdHandleAutoConversionMode[FD] == -1)
+    return 0;
+  struct f_cnvrt Cvt = {
+      savedStdHandleAutoConversionMode[FD], // cvtcmd
+      0,                                    // pccsid
+      0,                                    // fccsid
+  };
+  return (fcntl(FD, F_CONTROL_CVT, &Cvt));
+}
+
+int enableAutoConversion(int FD) {
   struct f_cnvrt Query = {
       QUERYCVT, // cvtcmd
       0,        // pccsid
@@ -36,17 +51,53 @@ std::error_code llvm::enableAutoConversion(int FD) {
   };
 
   if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
-    return std::error_code(errno, std::generic_category());
+    return -1;
+
+  // We don't need conversion for UTF-8 tagged files.
+  // TODO: Remove the assumption of ISO8859-1 = UTF-8 here when we fully resolve
+  // problems related to UTF-8 tagged source files.
+  // When the pccsid is not ISO8859-1, autoconversion is still needed.
+  if (Query.pccsid == CCSID_ISO8859_1 &&
+      (Query.fccsid == CCSID_UTF_8 || Query.fccsid == CCSID_ISO8859_1))
+    return 0;
+
+  // Save the state of std handles before we make changes to it.
+  if ((FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO) &&
+      savedStdHandleAutoConversionMode[FD] == -1)
+    savedStdHandleAutoConversionMode[FD] = Query.cvtcmd;
+
+  if (FD == STDOUT_FILENO || FD == STDERR_FILENO)
+    Query.cvtcmd = SETCVTON;
+  else
+    Query.cvtcmd = SETCVTALL;
 
-  Query.cvtcmd = SETCVTALL;
   Query.pccsid =
       (FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO)
           ? 0
           : CCSID_UTF_8;
   // Assume untagged files to be IBM-1047 encoded.
   Query.fccsid = (Query.fccsid == FT_UNTAGGED) ? CCSID_IBM_1047 : Query.fccsid;
-  if (fcntl(FD, F_CONTROL_CVT, &Query) == -1)
+  return fcntl(FD, F_CONTROL_CVT, &Query);
+}
+
+std::error_code llvm::disableAutoConversion(int FD) {
+  if (::disableAutoConversion(FD) == -1)
+    return std::error_code(errno, std::generic_category());
+
+  return std::error_code();
+}
+
+std::error_code llvm::enableAutoConversion(int FD) {
+  if (::enableAutoConversion(FD) == -1)
     return std::error_code(errno, std::generic_category());
+
+  return std::error_code();
+}
+
+std::error_code llvm::restoreStdHandleAutoConversion(int FD) {
+  if (::restoreStdHandleAutoConversion(FD) == -1)
+    return std::error_code(errno, std::generic_category());
+
   return std::error_code();
 }
 
diff --git a/llvm/lib/Support/InitLLVM.cpp b/llvm/lib/Support/InitLLVM.cpp
index 2b7173b289403..7f475f42f3cb8 100644
--- a/llvm/lib/Support/InitLLVM.cpp
+++ b/llvm/lib/Support/InitLLVM.cpp
@@ -8,6 +8,8 @@
 
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/AutoConvert.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -15,15 +17,31 @@
 #include "llvm/Support/SwapByteOrder.h"
 
 #ifdef _WIN32
-#include "llvm/Support/Error.h"
 #include "llvm/Support/Windows/WindowsSupport.h"
 #endif
 
+#ifdef __MVS__
+#include <unistd.h>
+
+void CleanupStdHandles(void *Cookie) {
+  llvm::raw_ostream *Outs = &llvm::outs(), *Errs = &llvm::errs();
+  Outs->flush();
+  Errs->flush();
+  llvm::restoreStdHandleAutoConversion(STDIN_FILENO);
+  llvm::restoreStdHandleAutoConversion(STDOUT_FILENO);
+  llvm::restoreStdHandleAutoConversion(STDERR_FILENO);
+}
+#endif
+
 using namespace llvm;
 using namespace llvm::sys;
 
 InitLLVM::InitLLVM(int &Argc, const char **&Argv,
                    bool InstallPipeSignalExitHandler) {
+#ifdef __MVS__
+  // Bring stdin/stdout/stderr into a known state.
+  sys::AddSignalHandler(CleanupStdHandles, nullptr);
+#endif
   if (InstallPipeSignalExitHandler)
     // The pipe signal handler must be installed before any other handlers are
     // registered. This is because the Unix \ref RegisterHandlers function does
@@ -37,6 +55,20 @@ InitLLVM::InitLLVM(int &Argc, const char **&Argv,
   sys::PrintStackTraceOnErrorSignal(Argv[0]);
   install_out_of_memory_new_handler();
 
+#ifdef __MVS__
+
+  // We use UTF-8 as the internal character encoding. On z/OS, all external
+  // output is encoded in EBCDIC. In order to be able to read all
+  // error messages, we turn conversion to EBCDIC on for stderr fd.
+  std::string Banner = std::string(Argv[0]) + ": ";
+  ExitOnError ExitOnErr(Banner);
+
+  // If turning on conversion for stderr fails then the error message
+  // may be garbled. There is no solution to this problem.
+  ExitOnErr(errorCodeToError(llvm::enableAutoConversion(STDERR_FILENO)));
+  ExitOnErr(errorCodeToError(llvm::enableAutoConversion(STDOUT_FILENO)));
+#endif
+
 #ifdef _WIN32
   // We use UTF-8 as the internal character encoding. On Windows,
   // arguments passed to main() may not be encoded in UTF-8. In order
@@ -61,4 +93,9 @@ InitLLVM::InitLLVM(int &Argc, const char **&Argv,
 #endif
 }
 
-InitLLVM::~InitLLVM() { llvm_shutdown(); }
+InitLLVM::~InitLLVM() {
+#ifdef __MVS__
+  CleanupStdHandles(nullptr);
+#endif
+  llvm_shutdown();
+}
diff --git a/llvm/lib/Support/Unix/Program.inc b/llvm/lib/Support/Unix/Program.inc
index 9466d0f0ba859..2e17d8c4ea3da 100644
--- a/llvm/lib/Support/Unix/Program.inc
+++ b/llvm/lib/Support/Unix/Program.inc
@@ -20,6 +20,7 @@
 #include "Unix.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
@@ -520,8 +521,12 @@ std::error_code llvm::sys::ChangeStdoutMode(fs::OpenFlags Flags) {
 }
 
 std::error_code llvm::sys::ChangeStdinToBinary() {
+#ifdef __MVS__
+  return disableAutoConversion(STDIN_FILENO);
+#else
   // Do nothing, as Unix doesn't differentiate between text and binary.
   return std::error_code();
+#endif
 }
 
 std::error_code llvm::sys::ChangeStdoutToBinary() {
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 8908e7b6a150c..d654ae450b340 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Duration.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -895,6 +896,10 @@ void raw_fd_ostream::anchor() {}
 raw_fd_ostream &llvm::outs() {
   // Set buffer settings to model stdout behavior.
   std::error_code EC;
+#ifdef __MVS__
+  EC = enableAutoConversion(STDOUT_FILENO);
+  assert(!EC);
+#endif
   static raw_fd_ostream S("-", EC, sys::fs::OF_None);
   assert(!EC);
   return S;
@@ -902,6 +907,10 @@ raw_fd_ostream &llvm::outs() {
 
 raw_fd_ostream &llvm::errs() {
   // Set standard error to be unbuffered and tied to outs() by default.
+#ifdef __MVS__
+  std::error_code EC = enableAutoConversion(STDOUT_FILENO);
+  assert(!EC);
+#endif
   static raw_fd_ostream S(STDERR_FILENO, false, true);
   return S;
 }
diff --git a/llvm/utils/count/CMakeLists.txt b/llvm/utils/count/CMakeLists.txt
index 4e0d371334e47..cfd1f4a85d8a1 100644
--- a/llvm/utils/count/CMakeLists.txt
+++ b/llvm/utils/count/CMakeLists.txt
@@ -1,3 +1,7 @@
+set(LLVM_LINK_COMPONENTS
+  support
+)
+
 add_llvm_utility(count
   count.c
   )
diff --git a/llvm/utils/count/count.c b/llvm/utils/count/count.c
index 7149c14a63abb..300be2aa8a18e 100644
--- a/llvm/utils/count/count.c
+++ b/llvm/utils/count/count.c
@@ -6,10 +6,18 @@
  *
 \*===----------------------------------------------------------------------===*/
 
-#include <stdlib.h>
+#include "llvm/Support/AutoConvert.h"
 #include <stdio.h>
+#include <stdlib.h>
 
 int main(int argc, char **argv) {
+#ifdef __MVS__
+  if (enableAutoConversion(fileno(stdin)) == -1)
+    fprintf(stderr, "Setting conversion on stdin failed\n");
+
+  if (enableAutoConversion(fileno(stderr)) == -1)
+    fprintf(stdout, "Setting conversion on stderr failed\n");
+#endif
   size_t Count, NumLines, NumRead;
   char Buffer[4096], *End;
 

Copy link
Contributor

@fanbo-meng fanbo-meng left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@abhina-sree abhina-sree merged commit ab380c2 into llvm:main Dec 13, 2023
4 checks passed
@abhina-sree abhina-sree deleted the abhina/autocvt branch December 13, 2023 12:46
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
clang Clang issues not falling into any other category llvm:support testing-tools
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants