[X86] Add ExpandLargeFpConvert Pass and enable for X86

As stated in https://discourse.llvm.org/t/rfc-llc-add-expandlargeintfpconvert-pass-for-fp-int-conversion-of-large-bitint/65528, this implementation is very similar to ExpandLargeDivRem, which expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’, ‘sitofp .. to’ instructions with a bitwidth above a threshold into auto-generated functions. This is useful for targets like x86_64 that cannot lower fp convertions with more than 128 bits. The expanded nodes are referring from the IR generated by `compiler-rt/lib/builtins/floattidf.c`, `compiler-rt/lib/builtins/fixdfti.c`, and etc. Corner cases: 1. For fp16: as there is no related builtins added in compliler-rt. So I mainly utilized the fp32 <-> fp16 lib calls to implement. 2. For fp80: as this pass is soft fp emulation and no fp80 instructions can help in this problem. I recommend users to deprecate this usage. For now, the implementation uses fp128 as the temporary conversion type and inserts fptrunc/ext at top/end of the function. 3. For bf16: as clang FE currently doesn't support bf16 algorithm operations (convert to int, float, +, -, *, ...), this patch doesn't consider bf16 for now. 4. For unsigned FPToI: since both default hardware behaviors and libgcc are ignoring "returns 0 for negative input" spec. This pass follows this old way to ignore unsigned FPToI. See this example: https://gcc.godbolt.org/z/bnv3jqW1M The end-to-end tests are uploaded at https://reviews.llvm.org/D138261 Reviewed By: LuoYuanke, mgehre-amd Differential Revision: https://reviews.llvm.org/D137241
llvm · Dec 1, 2022 · 89f36dd · 89f36dd
1 parent 416e8c6
commit 89f36dd
Show file tree

Hide file tree

Showing 26 changed files with 2,045 additions and 1 deletion.
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -44,6 +44,7 @@ FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass, ())
 FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass, (false))
 FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass, (true))
 FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass, ())
+FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass, ())
 FUNCTION_PASS("expand-reductions", ExpandReductionsPass, ())
 FUNCTION_PASS("expandvp", ExpandVectorPredicationPass, ())
 FUNCTION_PASS("lowerinvoke", LowerInvokePass, ())

diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
@@ -494,6 +494,9 @@ namespace llvm {
   // Expands large div/rem instructions.
   FunctionPass *createExpandLargeDivRemPass();
 
+  // Expands large div/rem instructions.
+  FunctionPass *createExpandLargeFpConvertPass();
+
   // This pass expands memcmp() to load/stores.
   FunctionPass *createExpandMemCmpPass();
 

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1961,6 +1961,12 @@ class TargetLoweringBase {
     return MaxDivRemBitWidthSupported;
   }
 
+  /// Returns the size in bits of the maximum larget fp convert the backend
+  /// supports. Larger operations will be expanded by ExpandLargeFPConvert.
+  unsigned getMaxLargeFPConvertBitWidthSupported() const {
+    return MaxLargeFPConvertBitWidthSupported;
+  }
+
   /// Returns the size of the smallest cmpxchg or ll/sc instruction
   /// the backend supports.  Any smaller operations are widened in
   /// AtomicExpandPass.
@@ -2540,6 +2546,12 @@ class TargetLoweringBase {
     MaxDivRemBitWidthSupported = SizeInBits;
   }
 
+  /// Set the size in bits of the maximum fp convert the backend supports.
+  /// Larger operations will be expanded by ExpandLargeFPConvert.
+  void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) {
+    MaxLargeFPConvertBitWidthSupported = SizeInBits;
+  }
+
   /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
   void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
     MinCmpXchgSizeInBits = SizeInBits;
@@ -3259,6 +3271,10 @@ class TargetLoweringBase {
   /// Larger operations will be expanded by ExpandLargeDivRem.
   unsigned MaxDivRemBitWidthSupported;
 
+  /// Size in bits of the maximum larget fp convert size the backend
+  /// supports. Larger operations will be expanded by ExpandLargeFPConvert.
+  unsigned MaxLargeFPConvertBitWidthSupported;
+
   /// Size in bits of the minimum cmpxchg or ll/sc operation the
   /// backend supports.
   unsigned MinCmpXchgSizeInBits;

diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
@@ -133,6 +133,7 @@ void initializeEarlyTailDuplicatePass(PassRegistry&);
 void initializeEdgeBundlesPass(PassRegistry&);
 void initializeEHContGuardCatchretPass(PassRegistry &);
 void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
+void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&);
 void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
 void initializeExpandMemCmpPassPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);

diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_component_library(LLVMCodeGen
   EHContGuardCatchret.cpp
   ExecutionDomainFix.cpp
   ExpandLargeDivRem.cpp
+  ExpandLargeFpConvert.cpp
   ExpandMemCmp.cpp
   ExpandPostRAPseudos.cpp
   ExpandReductions.cpp

diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
@@ -37,6 +37,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeEarlyMachineLICMPass(Registry);
   initializeEarlyTailDuplicatePass(Registry);
   initializeExpandLargeDivRemLegacyPassPass(Registry);
+  initializeExpandLargeFpConvertLegacyPassPass(Registry);
   initializeExpandMemCmpPassPass(Registry);
   initializeExpandPostRAPass(Registry);
   initializeFEntryInserterPass(Registry);