SimplifyLibCalls: Emit vector ldexp intrinsics in exp2->ldexp combine #92219

arsenm · 2024-05-15T06:12:53Z

No description provided.

llvmbot · 2024-05-15T06:13:28Z

@llvm/pr-subscribers-llvm-transforms

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/92219.diff

4 Files Affected:

(modified) llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp (+13-8)
(modified) llvm/test/Transforms/InstCombine/exp2-1.ll (+4-9)
(modified) llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll (+22-10)
(modified) llvm/test/Transforms/InstCombine/pow_fp_int.ll (+2-2)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 9cb8e20b4806f..cfcf517cf555a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2000,11 +2000,16 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
     Value *Op = cast<Instruction>(I2F)->getOperand(0);
     // Make sure that the exponent fits inside an "int" of size DstWidth,
     // thus avoiding any range issues that FP has not.
-    unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits();
-    if (BitWidth < DstWidth ||
-        (BitWidth == DstWidth && isa<SIToFPInst>(I2F)))
-      return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getIntNTy(DstWidth))
-                                  : B.CreateZExt(Op, B.getIntNTy(DstWidth));
+    unsigned BitWidth =
+        Op->getType()->getScalarType()->getPrimitiveSizeInBits();
+    if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) {
+      Type *IntTy = B.getIntNTy(DstWidth);
+      if (VectorType *VT = dyn_cast<VectorType>(I2F->getType()))
+        IntTy = VectorType::get(IntTy, VT->getElementCount());
+
+      return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, IntTy)
+                                  : B.CreateZExt(Op, IntTy);
+    }
   }
 
   return nullptr;
@@ -2377,10 +2382,10 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
       hasFloatVersion(M, Name))
     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
 
+  const bool UseIntrinsic = CI->doesNotAccessMemory();
   // Bail out for vectors because the code below only expects scalars.
-  // TODO: This could be allowed if we had a ldexp intrinsic (D14327).
   Type *Ty = CI->getType();
-  if (Ty->isVectorTy())
+  if (!UseIntrinsic && Ty->isVectorTy())
     return Ret;
 
   // exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= IntSize
@@ -2393,7 +2398,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
 
       // TODO: Emitting the intrinsic should not depend on whether the libcall
       // is available.
-      if (CI->doesNotAccessMemory()) {
+      if (UseIntrinsic) {
         return copyFlags(*CI, B.CreateIntrinsic(Intrinsic::ldexp,
                                                 {Ty, Exp->getType()},
                                                 {One, Exp}, CI));
diff --git a/llvm/test/Transforms/InstCombine/exp2-1.ll b/llvm/test/Transforms/InstCombine/exp2-1.ll
index 5bf70320d9ec4..2dff0b08ecf97 100644
--- a/llvm/test/Transforms/InstCombine/exp2-1.ll
+++ b/llvm/test/Transforms/InstCombine/exp2-1.ll
@@ -308,20 +308,15 @@ define float @sitofp_scalar_intrinsic_with_FMF(i8 %x) {
 
 define <2 x float> @sitofp_vector_intrinsic_with_FMF(<2 x i8> %x) {
 ; LDEXP32-LABEL: @sitofp_vector_intrinsic_with_FMF(
-; LDEXP32-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
-; LDEXP32-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
+; LDEXP32-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
+; LDEXP32-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
 ; LDEXP32-NEXT:    ret <2 x float> [[R]]
 ;
 ; LDEXP16-LABEL: @sitofp_vector_intrinsic_with_FMF(
-; LDEXP16-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
-; LDEXP16-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
+; LDEXP16-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i16>
+; LDEXP16-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i16> [[TMP1]])
 ; LDEXP16-NEXT:    ret <2 x float> [[R]]
 ;
-; NOLDEXPF-LABEL: @sitofp_vector_intrinsic_with_FMF(
-; NOLDEXPF-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
-; NOLDEXPF-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
-; NOLDEXPF-NEXT:    ret <2 x float> [[R]]
-;
 ; NOLDEXP-LABEL: @sitofp_vector_intrinsic_with_FMF(
 ; NOLDEXP-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
 ; NOLDEXP-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
diff --git a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
index 3069ee65e238a..6e5be5a19d6da 100644
--- a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
+++ b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
@@ -39,11 +39,17 @@ define float @exp2_f32_sitofp_i8_flags(i8 %x) {
 }
 
 define <2 x float> @exp2_v2f32_sitofp_v2i8(<2 x i8> %x) {
-; CHECK-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
-; CHECK-SAME: <2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i8> [[X]] to <2 x float>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[ITOFP]])
-; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+; LDEXP-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
+; LDEXP-SAME: <2 x i8> [[X:%.*]]) {
+; LDEXP-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
+; LDEXP-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
+; LDEXP-NEXT:    ret <2 x float> [[EXP2]]
+;
+; NOLDEXP-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
+; NOLDEXP-SAME: <2 x i8> [[X:%.*]]) {
+; NOLDEXP-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i8> [[X]] to <2 x float>
+; NOLDEXP-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[ITOFP]])
+; NOLDEXP-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %itofp = sitofp <2 x i8> %x to <2 x float>
   %exp2 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %itofp)
@@ -117,11 +123,17 @@ define fp128 @exp2_fp128_sitofp_i8(i8 %x) {
 }
 
 define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(<vscale x 4 x i8> %x) {
-; CHECK-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(
-; CHECK-SAME: <vscale x 4 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[ITOFP:%.*]] = sitofp <vscale x 4 x i8> [[X]] to <vscale x 4 x float>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[ITOFP]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[EXP2]]
+; LDEXP-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(
+; LDEXP-SAME: <vscale x 4 x i8> [[X:%.*]]) {
+; LDEXP-NEXT:    [[TMP1:%.*]] = sext <vscale x 4 x i8> [[X]] to <vscale x 4 x i32>
+; LDEXP-NEXT:    [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.ldexp.nxv4f32.nxv4i32(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP1]])
+; LDEXP-NEXT:    ret <vscale x 4 x float> [[EXP2]]
+;
+; NOLDEXP-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(
+; NOLDEXP-SAME: <vscale x 4 x i8> [[X:%.*]]) {
+; NOLDEXP-NEXT:    [[ITOFP:%.*]] = sitofp <vscale x 4 x i8> [[X]] to <vscale x 4 x float>
+; NOLDEXP-NEXT:    [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[ITOFP]])
+; NOLDEXP-NEXT:    ret <vscale x 4 x float> [[EXP2]]
 ;
   %itofp = sitofp <vscale x 4 x i8> %x to <vscale x 4 x float>
   %exp2 = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %itofp)
diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
index 9c1fa88f3183e..7b194b3f8925f 100644
--- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll
+++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
@@ -530,8 +530,8 @@ define double @powf_exp_const2_int_no_fast(double %base) {
 define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(<2 x i8> %x) {
 ; CHECK-LABEL: define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(
 ; CHECK-SAME: <2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X]] to <2 x float>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %s = sitofp <2 x i8> %x to <2 x float>

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

nikic

LGTM

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

Co-authored-by: Nikita Popov <github@npopov.com>

github-actions · 2024-05-16T07:01:27Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp

SimplifyLibCalls: Emit vector ldexp intrinsics in exp2->ldexp combine

f71d24c

arsenm added llvm:instcombine floating-point Floating-point math labels May 15, 2024

arsenm requested review from nikic, dtcxzyw, goldsteinn, andykaylor, jcranmer-intel and efriedma-quic May 15, 2024 06:12

llvmbot added the llvm:transforms label May 15, 2024

nikic reviewed May 15, 2024

View reviewed changes

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp Outdated Show resolved Hide resolved

Use getWithNewBitWidth

1dda52d

nikic approved these changes May 16, 2024

View reviewed changes

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp Outdated Show resolved Hide resolved

Use getScalarSizeInBits

accf636

Co-authored-by: Nikita Popov <github@npopov.com>

arsenm commented May 16, 2024

View reviewed changes

llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp Outdated Show resolved Hide resolved

Fix clang-format error

fddbef6

arsenm merged commit 0ea178b into llvm:main May 16, 2024
3 of 4 checks passed

arsenm deleted the instcombine-exp2-to-ldexp-vector branch May 16, 2024 08:25

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

SimplifyLibCalls: Emit vector ldexp intrinsics in exp2->ldexp combine #92219

SimplifyLibCalls: Emit vector ldexp intrinsics in exp2->ldexp combine #92219

arsenm commented May 15, 2024

llvmbot commented May 15, 2024

nikic left a comment

github-actions bot commented May 16, 2024 •

edited

Loading

SimplifyLibCalls: Emit vector ldexp intrinsics in exp2->ldexp combine #92219

SimplifyLibCalls: Emit vector ldexp intrinsics in exp2->ldexp combine #92219

Conversation

arsenm commented May 15, 2024

llvmbot commented May 15, 2024

nikic left a comment

Choose a reason for hiding this comment

github-actions bot commented May 16, 2024 • edited Loading

github-actions bot commented May 16, 2024 •

edited

Loading