Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[flang][runtime] Support for offload build of FortranDecimal. #87653

Merged
merged 3 commits into from
Apr 5, 2024

Conversation

vzakhari
Copy link
Contributor

@vzakhari vzakhari commented Apr 4, 2024

No description provided.

@vzakhari vzakhari requested a review from klausler April 4, 2024 16:28
@llvmbot llvmbot added flang:runtime flang Flang issues not falling into any other category labels Apr 4, 2024
@llvmbot
Copy link
Collaborator

llvmbot commented Apr 4, 2024

@llvm/pr-subscribers-flang-runtime

Author: Slava Zakharin (vzakhari)

Changes

Patch is 25.24 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/87653.diff

6 Files Affected:

  • (added) flang/cmake/modules/AddFlangOffloadRuntime.cmake (+132)
  • (modified) flang/lib/Decimal/CMakeLists.txt (+8-2)
  • (modified) flang/lib/Decimal/big-radix-floating-point.h (+38-31)
  • (modified) flang/lib/Decimal/binary-to-decimal.cpp (+5-1)
  • (modified) flang/lib/Decimal/decimal-to-binary.cpp (+13-9)
  • (modified) flang/runtime/CMakeLists.txt (+3-126)
diff --git a/flang/cmake/modules/AddFlangOffloadRuntime.cmake b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
new file mode 100644
index 00000000000000..6fb6213e90fc49
--- /dev/null
+++ b/flang/cmake/modules/AddFlangOffloadRuntime.cmake
@@ -0,0 +1,132 @@
+option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
+  "Compile Fortran runtime as CUDA sources (experimental)" OFF
+  )
+
+set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation")
+
+set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
+  "Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'")
+
+set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING
+  "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')")
+
+macro(enable_cuda_compilation files)
+  if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
+    if (BUILD_SHARED_LIBS)
+      message(FATAL_ERROR
+        "BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime"
+        )
+    endif()
+
+    enable_language(CUDA)
+
+    # TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION
+    # work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION.
+    set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
+
+    # Treat all supported sources as CUDA files.
+    set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA)
+    set(CUDA_COMPILE_OPTIONS)
+    if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang")
+      # Allow varargs.
+      set(CUDA_COMPILE_OPTIONS
+        -Xclang -fcuda-allow-variadic-functions
+        )
+    endif()
+    if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
+      set(CUDA_COMPILE_OPTIONS
+        --expt-relaxed-constexpr
+        # Disable these warnings:
+        #   'long double' is treated as 'double' in device code
+        -Xcudafe --diag_suppress=20208
+        -Xcudafe --display_error_number
+        )
+    endif()
+    set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
+      "${CUDA_COMPILE_OPTIONS}"
+      )
+
+    if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include")
+      # When using libcudacxx headers files, we have to use them
+      # for all files of F18 runtime.
+      include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include)
+      add_compile_definitions(RT_USE_LIBCUDACXX=1)
+    endif()
+  endif()
+endmacro()
+
+macro(enable_omp_offload_compilation files)
+  if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
+    # 'host_device' build only works with Clang compiler currently.
+    # The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use
+    # the in-tree built Clang. We may have a mode that would use the in-tree
+    # built Clang.
+    #
+    # 'nohost' is supposed to produce an LLVM Bitcode library,
+    # and it has to be done with a C/C++ compiler producing LLVM Bitcode
+    # compatible with the LLVM toolchain version distributed with the Flang
+    # compiler.
+    # In general, the in-tree built Clang should be used for 'nohost' build.
+    # Note that 'nohost' build does not produce the host version of Flang
+    # runtime library, so there will be two separate distributable objects.
+    # 'nohost' build is a TODO.
+
+    if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device")
+      message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime")
+    endif()
+    if (BUILD_SHARED_LIBS)
+      message(FATAL_ERROR
+        "BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime"
+        )
+    endif()
+
+    if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
+        "${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
+
+      set(all_amdgpu_architectures
+        "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
+        "gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030"
+        "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
+        "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
+        )
+      set(all_nvptx_architectures
+        "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
+        "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90"
+        )
+      set(all_gpu_architectures
+        "${all_amdgpu_architectures};${all_nvptx_architectures}"
+        )
+      # TODO: support auto detection on the build system.
+      if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all")
+        set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures})
+      endif()
+      list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES)
+
+      string(REPLACE ";" "," compile_for_architectures
+        "${FLANG_OMP_DEVICE_ARCHITECTURES}"
+        )
+
+      set(OMP_COMPILE_OPTIONS
+        -fopenmp
+        -fvisibility=hidden
+        -fopenmp-cuda-mode
+        --offload-arch=${compile_for_architectures}
+        # Force LTO for the device part.
+        -foffload-lto
+        )
+      set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
+        "${OMP_COMPILE_OPTIONS}"
+        )
+
+      # Enable "declare target" in the source code.
+      set_source_files_properties(${files}
+        PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD
+        )
+    else()
+      message(FATAL_ERROR
+        "Flang runtime build is not supported for these compilers:\n"
+        "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n"
+        "CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
+    endif()
+  endif()
+endmacro()
diff --git a/flang/lib/Decimal/CMakeLists.txt b/flang/lib/Decimal/CMakeLists.txt
index 2f6caa22e1562b..3d562b8e3ce1e5 100644
--- a/flang/lib/Decimal/CMakeLists.txt
+++ b/flang/lib/Decimal/CMakeLists.txt
@@ -49,11 +49,17 @@ endif()
 # avoid an unwanted dependency on libstdc++.so.
 add_definitions(-U_GLIBCXX_ASSERTIONS)
 
-add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN
+set(sources
   binary-to-decimal.cpp
   decimal-to-binary.cpp
 )
 
+include(AddFlangOffloadRuntime)
+enable_cuda_compilation("${sources}")
+enable_omp_offload_compilation("${sources}")
+
+add_flang_library(FortranDecimal INSTALL_WITH_TOOLCHAIN ${sources})
+
 if (DEFINED MSVC)
   set(CMAKE_MSVC_RUNTIME_LIBRARY MultiThreaded)
   add_flang_library(FortranDecimal.static INSTALL_WITH_TOOLCHAIN
@@ -77,4 +83,4 @@ if (DEFINED MSVC)
   )
   add_dependencies(FortranDecimal FortranDecimal.static FortranDecimal.dynamic
     FortranDecimal.static_dbg FortranDecimal.dynamic_dbg)
-endif()
\ No newline at end of file
+endif()
diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h
index 2143d1d9b3f776..6ce8ae7925c150 100644
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -30,6 +30,10 @@
 #include <limits>
 #include <type_traits>
 
+// Some environments, viz. glibc 2.17, allow the macro HUGE
+// to leak out of <math.h>.
+#undef HUGE
+
 namespace Fortran::decimal {
 
 static constexpr std::uint64_t TenToThe(int power) {
@@ -64,15 +68,15 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
   static constexpr int maxDigits{3 - minLog2AnyBit / log10Radix};
 
 public:
-  explicit BigRadixFloatingPointNumber(
+  explicit RT_API_ATTRS BigRadixFloatingPointNumber(
       enum FortranRounding rounding = RoundNearest)
       : rounding_{rounding} {}
 
   // Converts a binary floating point value.
-  explicit BigRadixFloatingPointNumber(
+  explicit RT_API_ATTRS BigRadixFloatingPointNumber(
       Real, enum FortranRounding = RoundNearest);
 
-  BigRadixFloatingPointNumber &SetToZero() {
+  RT_API_ATTRS BigRadixFloatingPointNumber &SetToZero() {
     isNegative_ = false;
     digits_ = 0;
     exponent_ = 0;
@@ -80,14 +84,14 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
   }
 
   // Converts decimal floating-point to binary.
-  ConversionToBinaryResult<PREC> ConvertToBinary();
+  RT_API_ATTRS ConversionToBinaryResult<PREC> ConvertToBinary();
 
   // Parses and converts to binary.  Handles leading spaces,
   // "NaN", & optionally-signed "Inf".  Does not skip internal
   // spaces.
   // The argument is a reference to a pointer that is left
   // pointing to the first character that wasn't parsed.
-  ConversionToBinaryResult<PREC> ConvertToBinary(
+  RT_API_ATTRS ConversionToBinaryResult<PREC> ConvertToBinary(
       const char *&, const char *end = nullptr);
 
   // Formats a decimal floating-point number to a user buffer.
@@ -96,7 +100,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
   // after the last digit; the effective decimal exponent is
   // returned as part of the result structure so that it can be
   // formatted by the client.
-  ConversionToDecimalResult ConvertToDecimal(
+  RT_API_ATTRS ConversionToDecimalResult ConvertToDecimal(
       char *, std::size_t, enum DecimalConversionFlags, int digits) const;
 
   // Discard decimal digits not needed to distinguish this value
@@ -108,13 +112,14 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
   // This minimization necessarily assumes that the value will be
   // emitted and read back into the same (or less precise) format
   // with default rounding to the nearest value.
-  void Minimize(
+  RT_API_ATTRS void Minimize(
       BigRadixFloatingPointNumber &&less, BigRadixFloatingPointNumber &&more);
 
   template <typename STREAM> STREAM &Dump(STREAM &) const;
 
 private:
-  BigRadixFloatingPointNumber(const BigRadixFloatingPointNumber &that)
+  RT_API_ATTRS BigRadixFloatingPointNumber(
+      const BigRadixFloatingPointNumber &that)
       : digits_{that.digits_}, exponent_{that.exponent_},
         isNegative_{that.isNegative_}, rounding_{that.rounding_} {
     for (int j{0}; j < digits_; ++j) {
@@ -122,7 +127,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     }
   }
 
-  bool IsZero() const {
+  RT_API_ATTRS bool IsZero() const {
     // Don't assume normalization.
     for (int j{0}; j < digits_; ++j) {
       if (digit_[j] != 0) {
@@ -136,13 +141,13 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
   // (When this happens during decimal-to-binary conversion,
   // there are more digits in the input string than can be
   // represented precisely.)
-  bool IsFull() const {
+  RT_API_ATTRS bool IsFull() const {
     return digits_ == digitLimit_ && digit_[digits_ - 1] >= radix / 10;
   }
 
   // Sets *this to an unsigned integer value.
   // Returns any remainder.
-  template <typename UINT> UINT SetTo(UINT n) {
+  template <typename UINT> RT_API_ATTRS UINT SetTo(UINT n) {
     static_assert(
         std::is_same_v<UINT, common::uint128_t> || std::is_unsigned_v<UINT>);
     SetToZero();
@@ -169,7 +174,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     }
   }
 
-  int RemoveLeastOrderZeroDigits() {
+  RT_API_ATTRS int RemoveLeastOrderZeroDigits() {
     int remove{0};
     if (digits_ > 0 && digit_[0] == 0) {
       while (remove < digits_ && digit_[remove] == 0) {
@@ -193,25 +198,25 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     return remove;
   }
 
-  void RemoveLeadingZeroDigits() {
+  RT_API_ATTRS void RemoveLeadingZeroDigits() {
     while (digits_ > 0 && digit_[digits_ - 1] == 0) {
       --digits_;
     }
   }
 
-  void Normalize() {
+  RT_API_ATTRS void Normalize() {
     RemoveLeadingZeroDigits();
     exponent_ += RemoveLeastOrderZeroDigits() * log10Radix;
   }
 
   // This limited divisibility test only works for even divisors of the radix,
   // which is fine since it's only ever used with 2 and 5.
-  template <int N> bool IsDivisibleBy() const {
+  template <int N> RT_API_ATTRS bool IsDivisibleBy() const {
     static_assert(N > 1 && radix % N == 0, "bad modulus");
     return digits_ == 0 || (digit_[0] % N) == 0;
   }
 
-  template <unsigned DIVISOR> int DivideBy() {
+  template <unsigned DIVISOR> RT_API_ATTRS int DivideBy() {
     Digit remainder{0};
     for (int j{digits_ - 1}; j >= 0; --j) {
       Digit q{digit_[j] / DIVISOR};
@@ -222,7 +227,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     return remainder;
   }
 
-  void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix
+  RT_API_ATTRS void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix
     Digit remainder{0};
     auto mask{(Digit{1} << twoPow) - 1};
     auto coeff{radix >> twoPow};
@@ -234,7 +239,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
   }
 
   // Returns true on overflow
-  bool DivideByPowerOfTwoInPlace(int twoPow) {
+  RT_API_ATTRS bool DivideByPowerOfTwoInPlace(int twoPow) {
     if (digits_ > 0) {
       while (twoPow > 0) {
         int chunk{twoPow > log10Radix ? log10Radix : twoPow};
@@ -264,7 +269,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     return false; // no overflow
   }
 
-  int AddCarry(int position = 0, int carry = 1) {
+  RT_API_ATTRS int AddCarry(int position = 0, int carry = 1) {
     for (; position < digits_; ++position) {
       Digit v{digit_[position] + carry};
       if (v < radix) {
@@ -286,13 +291,13 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     return carry;
   }
 
-  void Decrement() {
+  RT_API_ATTRS void Decrement() {
     for (int j{0}; digit_[j]-- == 0; ++j) {
       digit_[j] = radix - 1;
     }
   }
 
-  template <int N> int MultiplyByHelper(int carry = 0) {
+  template <int N> RT_API_ATTRS int MultiplyByHelper(int carry = 0) {
     for (int j{0}; j < digits_; ++j) {
       auto v{N * digit_[j] + carry};
       carry = v / radix;
@@ -301,7 +306,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     return carry;
   }
 
-  template <int N> int MultiplyBy(int carry = 0) {
+  template <int N> RT_API_ATTRS int MultiplyBy(int carry = 0) {
     if (int newCarry{MultiplyByHelper<N>(carry)}) {
       return AddCarry(digits_, newCarry);
     } else {
@@ -309,7 +314,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     }
   }
 
-  template <int N> int MultiplyWithoutNormalization() {
+  template <int N> RT_API_ATTRS int MultiplyWithoutNormalization() {
     if (int carry{MultiplyByHelper<N>(0)}) {
       if (digits_ < digitLimit_) {
         digit_[digits_++] = carry;
@@ -322,9 +327,9 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     }
   }
 
-  void LoseLeastSignificantDigit(); // with rounding
+  RT_API_ATTRS void LoseLeastSignificantDigit(); // with rounding
 
-  void PushCarry(int carry) {
+  RT_API_ATTRS void PushCarry(int carry) {
     if (digits_ == maxDigits && RemoveLeastOrderZeroDigits() == 0) {
       LoseLeastSignificantDigit();
       digit_[digits_ - 1] += carry;
@@ -336,18 +341,20 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
   // Adds another number and then divides by two.
   // Assumes same exponent and sign.
   // Returns true when the result has effectively been rounded down.
-  bool Mean(const BigRadixFloatingPointNumber &);
+  RT_API_ATTRS bool Mean(const BigRadixFloatingPointNumber &);
 
   // Parses a floating-point number; leaves the pointer reference
   // argument pointing at the next character after what was recognized.
   // The "end" argument can be left null if the caller is sure that the
   // string is properly terminated with an addressable character that
   // can't be in a valid floating-point character.
-  bool ParseNumber(const char *&, bool &inexact, const char *end);
+  RT_API_ATTRS bool ParseNumber(const char *&, bool &inexact, const char *end);
 
   using Raw = typename Real::RawType;
-  constexpr Raw SignBit() const { return Raw{isNegative_} << (Real::bits - 1); }
-  constexpr Raw Infinity() const {
+  constexpr RT_API_ATTRS Raw SignBit() const {
+    return Raw{isNegative_} << (Real::bits - 1);
+  }
+  constexpr RT_API_ATTRS Raw Infinity() const {
     Raw result{static_cast<Raw>(Real::maxExponent)};
     result <<= Real::significandBits;
     result |= SignBit();
@@ -356,7 +363,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     }
     return result;
   }
-  constexpr Raw NaN(bool isQuiet = true) {
+  constexpr RT_API_ATTRS Raw NaN(bool isQuiet = true) {
     Raw result{Real::maxExponent};
     result <<= Real::significandBits;
     result |= SignBit();
@@ -369,7 +376,7 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     }
     return result;
   }
-  constexpr Raw HUGE() const {
+  constexpr RT_API_ATTRS Raw HUGE() const {
     Raw result{static_cast<Raw>(Real::maxExponent)};
     result <<= Real::significandBits;
     result |= SignBit();
diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp
index 55fc548a6979bd..b64865e95df24d 100644
--- a/flang/lib/Decimal/binary-to-decimal.cpp
+++ b/flang/lib/Decimal/binary-to-decimal.cpp
@@ -336,6 +336,8 @@ template ConversionToDecimalResult ConvertToDecimal<113>(char *, std::size_t,
     BinaryFloatingPointNumber<113>);
 
 extern "C" {
+RT_EXT_API_GROUP_BEGIN
+
 ConversionToDecimalResult ConvertFloatToDecimal(char *buffer, std::size_t size,
     enum DecimalConversionFlags flags, int digits,
     enum FortranRounding rounding, float x) {
@@ -365,7 +367,9 @@ ConversionToDecimalResult ConvertLongDoubleToDecimal(char *buffer,
       rounding, Fortran::decimal::BinaryFloatingPointNumber<113>(x));
 }
 #endif
-}
+
+RT_EXT_API_GROUP_END
+} // extern "C"
 
 template <int PREC, int LOG10RADIX>
 template <typename STREAM>
diff --git a/flang/lib/Decimal/decimal-to-binary.cpp b/flang/lib/Decimal/decimal-to-binary.cpp
index c5cdb72e355f62..dc4aa82ac6fe49 100644
--- a/flang/lib/Decimal/decimal-to-binary.cpp
+++ b/flang/lib/Decimal/decimal-to-binary.cpp
@@ -191,12 +191,12 @@ template <int PREC> class IntermediateFloat {
   static constexpr IntType topBit{IntType{1} << (precision - 1)};
   static constexpr IntType mask{topBit + (topBit - 1)};
 
-  IntermediateFloat() {}
+  RT_API_ATTRS IntermediateFloat() {}
   IntermediateFloat(const IntermediateFloat &) = default;
 
   // Assumes that exponent_ is valid on entry, and may increment it.
   // Returns the number of guard_ bits that have been determined.
-  template <typename UINT> bool SetTo(UINT n) {
+  template <typename UINT> RT_API_ATTRS bool SetTo(UINT n) {
     static constexpr int nBits{CHAR_BIT * sizeof n};
     if constexpr (precision >= nBits) {
       value_ = n;
@@ -218,14 +218,14 @@ template <int PREC> class IntermediateFloat {
     }
   }
 
-  void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; }
-  bool IsFull() const { return value_ >= topBit; }
-  void AdjustExponent(int by) { exponent_ += by; }
-  void SetGuard(int g) {
+  RT_API_ATTRS void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; }
+  RT_API_ATTRS bool IsFull() const { return value_ >= topBit; }
+  RT_API_ATTRS void AdjustExponent(int by) { exponent_ += by; }
+  RT_API_ATTRS void SetGuard(int g) {
     guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1);
   }
 
-  ConversionToBinaryResult<PREC> ToBinary(
+  RT_API_ATTRS ConversionToBinaryResult<PREC> ToBinary(
       bool isNegative, FortranRounding) const;
 
 private:
@@ -241,7 +241,7 @@ template <int PREC> class IntermediateFloat {
 // The standard says that these overflow cases round to "representable"
 // numbers, and some popular compilers interpret that to mean +/-HUGE()
 // rather than +/-Inf.
-static inline constexpr bool RoundOverflowToHuge(
+static inline RT_API_ATTRS constexpr bool RoundOverflowToHuge(
     enum FortranRounding rounding, bool isNegative) {
   return rounding == RoundToZero || (!isNegative && rounding == RoundDown) ||
       (isNegative && rounding == RoundUp);
@@ -531,6 +531,8 @@ template ConversionToBinaryResult<113> ConvertToBinary<113>(
     const char *&, enum FortranRounding, const char *end);
 
 extern "C" {
+RT_EXT_API_GROUP_BEGIN
+
 enum ConversionResultFlags ConvertDecimalToFloat(
     const char **p, float *f, enum FortranRounding rounding) {
   auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)};
@@ -552,5 +554,7 @@ enum ConversionResultFlags ConvertDecimalToLongDouble(
       reinterpret_cast<const void *>(&result.binary), sizeof *ld);
   return result.flags;
 }
-}
+
+RT_EXT_API_GROUP_END
+} // extern "C"
 } // namespace Fortran::decimal
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt...
[truncated]

@vzakhari
Copy link
Contributor Author

vzakhari commented Apr 5, 2024

Just a friendly ping.

@klausler
Copy link
Contributor

klausler commented Apr 5, 2024

Thanks for the ping, this one got lost.

@vzakhari vzakhari merged commit b329da8 into llvm:main Apr 5, 2024
4 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
flang:runtime flang Flang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants