SimplifyLibCalls: Emit vector ldexp intrinsics in exp2->ldexp combine (…

…#92219) Co-authored-by: Nikita Popov <github@npopov.com>
llvm · May 16, 2024 · 0ea178b · 0ea178b
1 parent cd5ee27
commit 0ea178b
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 29 deletions.
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1992,11 +1992,12 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B, unsigned DstWidth) {
     Value *Op = cast<Instruction>(I2F)->getOperand(0);
     // Make sure that the exponent fits inside an "int" of size DstWidth,
     // thus avoiding any range issues that FP has not.
-    unsigned BitWidth = Op->getType()->getPrimitiveSizeInBits();
-    if (BitWidth < DstWidth ||
-        (BitWidth == DstWidth && isa<SIToFPInst>(I2F)))
-      return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, B.getIntNTy(DstWidth))
-                                  : B.CreateZExt(Op, B.getIntNTy(DstWidth));
+    unsigned BitWidth = Op->getType()->getScalarSizeInBits();
+    if (BitWidth < DstWidth || (BitWidth == DstWidth && isa<SIToFPInst>(I2F))) {
+      Type *IntTy = Op->getType()->getWithNewBitWidth(DstWidth);
+      return isa<SIToFPInst>(I2F) ? B.CreateSExt(Op, IntTy)
+                                  : B.CreateZExt(Op, IntTy);
+    }
   }
 
   return nullptr;
@@ -2366,10 +2367,10 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
       hasFloatVersion(M, Name))
     Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
 
+  const bool UseIntrinsic = CI->doesNotAccessMemory();
   // Bail out for vectors because the code below only expects scalars.
-  // TODO: This could be allowed if we had a ldexp intrinsic (D14327).
   Type *Ty = CI->getType();
-  if (Ty->isVectorTy())
+  if (!UseIntrinsic && Ty->isVectorTy())
     return Ret;
 
   // exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= IntSize
@@ -2382,7 +2383,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
 
       // TODO: Emitting the intrinsic should not depend on whether the libcall
       // is available.
-      if (CI->doesNotAccessMemory()) {
+      if (UseIntrinsic) {
         return copyFlags(*CI, B.CreateIntrinsic(Intrinsic::ldexp,
                                                 {Ty, Exp->getType()},
                                                 {One, Exp}, CI));

diff --git a/llvm/test/Transforms/InstCombine/exp2-1.ll b/llvm/test/Transforms/InstCombine/exp2-1.ll
@@ -308,20 +308,15 @@ define float @sitofp_scalar_intrinsic_with_FMF(i8 %x) {
 
 define <2 x float> @sitofp_vector_intrinsic_with_FMF(<2 x i8> %x) {
 ; LDEXP32-LABEL: @sitofp_vector_intrinsic_with_FMF(
-; LDEXP32-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
-; LDEXP32-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
+; LDEXP32-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i32>
+; LDEXP32-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
 ; LDEXP32-NEXT:    ret <2 x float> [[R]]
 ;
 ; LDEXP16-LABEL: @sitofp_vector_intrinsic_with_FMF(
-; LDEXP16-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
-; LDEXP16-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
+; LDEXP16-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X:%.*]] to <2 x i16>
+; LDEXP16-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i16> [[TMP1]])
 ; LDEXP16-NEXT:    ret <2 x float> [[R]]
 ;
-; NOLDEXPF-LABEL: @sitofp_vector_intrinsic_with_FMF(
-; NOLDEXPF-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
-; NOLDEXPF-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
-; NOLDEXPF-NEXT:    ret <2 x float> [[R]]
-;
 ; NOLDEXP-LABEL: @sitofp_vector_intrinsic_with_FMF(
 ; NOLDEXP-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X:%.*]] to <2 x float>
 ; NOLDEXP-NEXT:    [[R:%.*]] = call nnan <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])

diff --git a/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll b/llvm/test/Transforms/InstCombine/exp2-to-ldexp.ll
@@ -39,11 +39,17 @@ define float @exp2_f32_sitofp_i8_flags(i8 %x) {
 }
 
 define <2 x float> @exp2_v2f32_sitofp_v2i8(<2 x i8> %x) {
-; CHECK-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
-; CHECK-SAME: <2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i8> [[X]] to <2 x float>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[ITOFP]])
-; CHECK-NEXT:    ret <2 x float> [[EXP2]]
+; LDEXP-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
+; LDEXP-SAME: <2 x i8> [[X:%.*]]) {
+; LDEXP-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
+; LDEXP-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
+; LDEXP-NEXT:    ret <2 x float> [[EXP2]]
+;
+; NOLDEXP-LABEL: define <2 x float> @exp2_v2f32_sitofp_v2i8(
+; NOLDEXP-SAME: <2 x i8> [[X:%.*]]) {
+; NOLDEXP-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i8> [[X]] to <2 x float>
+; NOLDEXP-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[ITOFP]])
+; NOLDEXP-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %itofp = sitofp <2 x i8> %x to <2 x float>
   %exp2 = call <2 x float> @llvm.exp2.v2f32(<2 x float> %itofp)
@@ -117,11 +123,17 @@ define fp128 @exp2_fp128_sitofp_i8(i8 %x) {
 }
 
 define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(<vscale x 4 x i8> %x) {
-; CHECK-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(
-; CHECK-SAME: <vscale x 4 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[ITOFP:%.*]] = sitofp <vscale x 4 x i8> [[X]] to <vscale x 4 x float>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[ITOFP]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[EXP2]]
+; LDEXP-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(
+; LDEXP-SAME: <vscale x 4 x i8> [[X:%.*]]) {
+; LDEXP-NEXT:    [[TMP1:%.*]] = sext <vscale x 4 x i8> [[X]] to <vscale x 4 x i32>
+; LDEXP-NEXT:    [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.ldexp.nxv4f32.nxv4i32(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[TMP1]])
+; LDEXP-NEXT:    ret <vscale x 4 x float> [[EXP2]]
+;
+; NOLDEXP-LABEL: define <vscale x 4 x float> @exp2_nxv4f32_sitofp_i8(
+; NOLDEXP-SAME: <vscale x 4 x i8> [[X:%.*]]) {
+; NOLDEXP-NEXT:    [[ITOFP:%.*]] = sitofp <vscale x 4 x i8> [[X]] to <vscale x 4 x float>
+; NOLDEXP-NEXT:    [[EXP2:%.*]] = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> [[ITOFP]])
+; NOLDEXP-NEXT:    ret <vscale x 4 x float> [[EXP2]]
 ;
   %itofp = sitofp <vscale x 4 x i8> %x to <vscale x 4 x float>
   %exp2 = call <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float> %itofp)

diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
@@ -530,8 +530,8 @@ define double @powf_exp_const2_int_no_fast(double %base) {
 define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(<2 x i8> %x) {
 ; CHECK-LABEL: define <2 x float> @pow_sitofp_const_base_2_no_fast_vector(
 ; CHECK-SAME: <2 x i8> [[X:%.*]]) {
-; CHECK-NEXT:    [[S:%.*]] = sitofp <2 x i8> [[X]] to <2 x float>
-; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[S]])
+; CHECK-NEXT:    [[TMP1:%.*]] = sext <2 x i8> [[X]] to <2 x i32>
+; CHECK-NEXT:    [[EXP2:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[TMP1]])
 ; CHECK-NEXT:    ret <2 x float> [[EXP2]]
 ;
   %s = sitofp <2 x i8> %x to <2 x float>