Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SimplifyLibCalls: Permit pow(2, x) -> ldexp(1, x) fold for vectors #92532

Merged
merged 1 commit into from
May 19, 2024

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented May 17, 2024

The TODO has been addressed since the ldexp intrinsic exists now.

The TODO has been addressed since the ldexp intrinsic exists now.
@llvmbot
Copy link
Collaborator

llvmbot commented May 17, 2024

@llvm/pr-subscribers-llvm-transforms

Author: Matt Arsenault (arsenm)

Changes

The TODO has been addressed since the ldexp intrinsic exists now.


Full diff: https://github.com/llvm/llvm-project/pull/92532.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp (+4-3)
  • (modified) llvm/test/Transforms/InstCombine/pow-to-ldexp.ll (+20-49)
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c9567b740026b..eb1224abf00e2 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2087,15 +2087,16 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
 
   AttributeList NoAttrs; // Attributes are only meaningful on the original call
 
+  const bool UseIntrinsic = Pow->doesNotAccessMemory();
+
   // pow(2.0, itofp(x)) -> ldexp(1.0, x)
-  // TODO: This does not work for vectors because there is no ldexp intrinsic.
-  if (!Ty->isVectorTy() && match(Base, m_SpecificFP(2.0)) &&
+  if ((UseIntrinsic || !Ty->isVectorTy()) && match(Base, m_SpecificFP(2.0)) &&
       (isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
       hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
     if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize())) {
       Constant *One = ConstantFP::get(Ty, 1.0);
 
-      if (Pow->doesNotAccessMemory()) {
+      if (UseIntrinsic) {
         return copyFlags(*Pow, B.CreateIntrinsic(Intrinsic::ldexp,
                                                  {Ty, ExpoI->getType()},
                                                  {One, ExpoI}, Pow, "exp2"));
diff --git a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll
index 27249dd5d72ae..b61f8809bd259 100644
--- a/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll
+++ b/llvm/test/Transforms/InstCombine/pow-to-ldexp.ll
@@ -144,16 +144,10 @@ define half @pow_sitofp_f16_const_base_2(i32 %x) {
 }
 
 define <2 x float> @pow_sitofp_v2f32_const_base_2(<2 x i32> %x) {
-; LDEXP-EXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2(
-; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) {
-; LDEXP-EXP2-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[X]])
-; LDEXP-EXP2-NEXT:    ret <2 x float> [[EXP2]]
-;
-; LDEXP-NOEXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2(
-; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) {
-; LDEXP-NOEXP2-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x float>
-; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <2 x float> @llvm.pow.v2f32(<2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x float> [[ITOFP]])
-; LDEXP-NOEXP2-NEXT:    ret <2 x float> [[POW]]
+; LDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2(
+; LDEXP-SAME: <2 x i32> [[X:%.*]]) {
+; LDEXP-NEXT:    [[EXP2:%.*]] = tail call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[X]])
+; LDEXP-NEXT:    ret <2 x float> [[EXP2]]
 ;
 ; NOLDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2(
 ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) {
@@ -205,15 +199,10 @@ define <2 x float> @pow_sitofp_v2f32_const_base_mixed_2(<2 x i32> %x) {
 }
 
 define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(<2 x i32> %x) {
-; LDEXP-EXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(
-; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) {
-; LDEXP-EXP2-NEXT:    [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[X]])
-; LDEXP-EXP2-NEXT:    ret <2 x float> [[EXP2]]
-;
-; LDEXP-NOEXP2-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(
-; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) {
-; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call nsz afn <2 x float> @llvm.powi.v2f32.v2i32(<2 x float> <float 2.000000e+00, float 2.000000e+00>, <2 x i32> [[X]])
-; LDEXP-NOEXP2-NEXT:    ret <2 x float> [[POW]]
+; LDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(
+; LDEXP-SAME: <2 x i32> [[X:%.*]]) {
+; LDEXP-NEXT:    [[EXP2:%.*]] = tail call nsz afn <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> <float 1.000000e+00, float 1.000000e+00>, <2 x i32> [[X]])
+; LDEXP-NEXT:    ret <2 x float> [[EXP2]]
 ;
 ; NOLDEXP-LABEL: define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(
 ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) {
@@ -227,16 +216,10 @@ define <2 x float> @pow_sitofp_v2f32_const_base_2__flags(<2 x i32> %x) {
 }
 
 define <vscale x 4 x float> @pow_sitofp_nxv4f32_const_base_2(<vscale x 4 x i32> %x) {
-; LDEXP-EXP2-LABEL: define <vscale x 4 x float> @pow_sitofp_nxv4f32_const_base_2(
-; LDEXP-EXP2-SAME: <vscale x 4 x i32> [[X:%.*]]) {
-; LDEXP-EXP2-NEXT:    [[EXP2:%.*]] = tail call <vscale x 4 x float> @llvm.ldexp.nxv4f32.nxv4i32(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[X]])
-; LDEXP-EXP2-NEXT:    ret <vscale x 4 x float> [[EXP2]]
-;
-; LDEXP-NOEXP2-LABEL: define <vscale x 4 x float> @pow_sitofp_nxv4f32_const_base_2(
-; LDEXP-NOEXP2-SAME: <vscale x 4 x i32> [[X:%.*]]) {
-; LDEXP-NOEXP2-NEXT:    [[ITOFP:%.*]] = sitofp <vscale x 4 x i32> [[X]] to <vscale x 4 x float>
-; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float> [[ITOFP]])
-; LDEXP-NOEXP2-NEXT:    ret <vscale x 4 x float> [[POW]]
+; LDEXP-LABEL: define <vscale x 4 x float> @pow_sitofp_nxv4f32_const_base_2(
+; LDEXP-SAME: <vscale x 4 x i32> [[X:%.*]]) {
+; LDEXP-NEXT:    [[EXP2:%.*]] = tail call <vscale x 4 x float> @llvm.ldexp.nxv4f32.nxv4i32(<vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[X]])
+; LDEXP-NEXT:    ret <vscale x 4 x float> [[EXP2]]
 ;
 ; NOLDEXP-LABEL: define <vscale x 4 x float> @pow_sitofp_nxv4f32_const_base_2(
 ; NOLDEXP-SAME: <vscale x 4 x i32> [[X:%.*]]) {
@@ -250,16 +233,10 @@ define <vscale x 4 x float> @pow_sitofp_nxv4f32_const_base_2(<vscale x 4 x i32>
 }
 
 define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) {
-; LDEXP-EXP2-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2(
-; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) {
-; LDEXP-EXP2-NEXT:    [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> <half 0xH3C00, half 0xH3C00>, <2 x i32> [[X]])
-; LDEXP-EXP2-NEXT:    ret <2 x half> [[EXP2]]
-;
-; LDEXP-NOEXP2-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2(
-; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) {
-; LDEXP-NOEXP2-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x half>
-; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <2 x half> @llvm.pow.v2f16(<2 x half> <half 0xH4000, half 0xH4000>, <2 x half> [[ITOFP]])
-; LDEXP-NOEXP2-NEXT:    ret <2 x half> [[POW]]
+; LDEXP-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2(
+; LDEXP-SAME: <2 x i32> [[X:%.*]]) {
+; LDEXP-NEXT:    [[EXP2:%.*]] = tail call <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half> <half 0xH3C00, half 0xH3C00>, <2 x i32> [[X]])
+; LDEXP-NEXT:    ret <2 x half> [[EXP2]]
 ;
 ; NOLDEXP-LABEL: define <2 x half> @pow_sitofp_v2f16_const_base_2(
 ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) {
@@ -273,16 +250,10 @@ define <2 x half> @pow_sitofp_v2f16_const_base_2(<2 x i32> %x) {
 }
 
 define <2 x double> @pow_sitofp_v2f64_const_base_2(<2 x i32> %x) {
-; LDEXP-EXP2-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2(
-; LDEXP-EXP2-SAME: <2 x i32> [[X:%.*]]) {
-; LDEXP-EXP2-NEXT:    [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x i32> [[X]])
-; LDEXP-EXP2-NEXT:    ret <2 x double> [[EXP2]]
-;
-; LDEXP-NOEXP2-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2(
-; LDEXP-NOEXP2-SAME: <2 x i32> [[X:%.*]]) {
-; LDEXP-NOEXP2-NEXT:    [[ITOFP:%.*]] = sitofp <2 x i32> [[X]] to <2 x double>
-; LDEXP-NOEXP2-NEXT:    [[POW:%.*]] = tail call <2 x double> @llvm.pow.v2f64(<2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> [[ITOFP]])
-; LDEXP-NOEXP2-NEXT:    ret <2 x double> [[POW]]
+; LDEXP-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2(
+; LDEXP-SAME: <2 x i32> [[X:%.*]]) {
+; LDEXP-NEXT:    [[EXP2:%.*]] = tail call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x i32> [[X]])
+; LDEXP-NEXT:    ret <2 x double> [[EXP2]]
 ;
 ; NOLDEXP-LABEL: define <2 x double> @pow_sitofp_v2f64_const_base_2(
 ; NOLDEXP-SAME: <2 x i32> [[X:%.*]]) {

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@arsenm arsenm merged commit c1c1567 into llvm:main May 19, 2024
7 checks passed
@arsenm arsenm deleted the simplifylibcalls-allow-pow-to-exp2-vectors branch May 19, 2024 18:48
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants