Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LoopIdiomRecognize: add negative tests for powi idiom #72648

Closed
wants to merge 1 commit into from

Conversation

artagnon
Copy link
Contributor

@artagnon artagnon commented Nov 17, 2023

The following code, when compiled under -ffast-math, produces bad codegen due to LoopVectorize:

  float powi(float base, int exp) {
    float result = 1.0;
      for (int i = 0; i < exp; ++i)
        result *= base;
    return result;
  }

It can easily be replaced with the llvm.powi intrinsic, when the exponent is a C int type. This is the job of LoopIdiomRecognize, and has been marked as a TODO item for years. In preparation to fulfill this wish, add negative tests corresponding to variations of this program.

@llvmbot
Copy link
Collaborator

llvmbot commented Nov 17, 2023

@llvm/pr-subscribers-llvm-transforms

Author: Ramkumar Ramachandra (artagnon)

Changes

The following code, when compiled under -ffast-math, produces bad codegen due to LoopVectorize:

  float powi(float base, int exp) {
    float result = 1.0;
      for (int i = 0; i &lt; exp; ++i)
        result *= base;
    return result;
  }

It can easily be replaced with the llvm.powi intrinsic, when the exponent is, at most, a C int type. This is the job of LoopIdiomRecognize, and has been marked as a TODO item for years. In preparation to fulfill this wish, add negative tests corresponding to variations of this program.


Full diff: https://github.com/llvm/llvm-project/pull/72648.diff

1 Files Affected:

  • (added) llvm/test/Transforms/LoopIdiom/powi.ll (+386)
diff --git a/llvm/test/Transforms/LoopIdiom/powi.ll b/llvm/test/Transforms/LoopIdiom/powi.ll
new file mode 100644
index 000000000000000..63e9fbce5a931eb
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/powi.ll
@@ -0,0 +1,386 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes='loop(loop-idiom,loop-deletion,indvars),function(gvn,simplifycfg)' < %s -S | FileCheck %s
+
+define float @powi_f32(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_f32(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT:    br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    ret float [[RESULT_LCSSA]]
+;
+entry:
+  %cmp.not = icmp eq i32 %exp, 0
+  br i1 %cmp.not, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %result = phi float [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+  %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+  %mul = fmul fast float %result, %base
+  %dec = add nsw i32 %merge.dec, -1
+  %cmp.eq = icmp eq i32 %dec, 0
+  br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+  ret float %result.lcssa
+}
+
+define double @powi_f64(double %base, i32 %exp) {
+; CHECK-LABEL: define double @powi_f64(
+; CHECK-SAME: double [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT:    br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    ret double [[RESULT_LCSSA]]
+;
+entry:
+  %cmp.not = icmp eq i32 %exp, 0
+  br i1 %cmp.not, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %result = phi double [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+  %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+  %mul = fmul fast double %result, %base
+  %dec = add nsw i32 %merge.dec, -1
+  %cmp.eq = icmp eq i32 %dec, 0
+  br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %result.lcssa = phi double [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+  ret double %result.lcssa
+}
+
+define double @powi_i16_iv(double %base, i16 %exp) {
+; CHECK-LABEL: define double @powi_i16_iv(
+; CHECK-SAME: double [[BASE:%.*]], i16 [[EXP:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i16 [[EXP]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MERGE_DEC:%.*]] = phi i16 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[DEC]] = add nsw i16 [[MERGE_DEC]], -1
+; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i16 [[DEC]], 0
+; CHECK-NEXT:    br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    ret double [[RESULT_LCSSA]]
+;
+entry:
+  %cmp.not = icmp eq i16 %exp, 0
+  br i1 %cmp.not, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %result = phi double [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+  %merge.dec = phi i16 [ %dec, %while.body ], [ %exp, %entry ]
+  %mul = fmul fast double %result, %base
+  %dec = add nsw i16 %merge.dec, -1
+  %cmp.eq = icmp eq i16 %dec, 0
+  br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %result.lcssa = phi double [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+  ret double %result.lcssa
+}
+
+define float @powi_canonical_iv_signed(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_canonical_iv_signed(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_SGT:%.*]] = icmp sgt i32 [[EXP]], 0
+; CHECK-NEXT:    br i1 [[CMP_SGT]], label [[FOR_BODY:%.*]], label [[EXIT:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[FOR_BODY]] ]
+; CHECK-NEXT:    ret float [[RESULT_LCSSA]]
+;
+entry:
+  %cmp.sgt = icmp sgt i32 %exp, 0
+  br i1 %cmp.sgt, label %for.body, label %exit
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result = phi float [ %mul, %for.body ], [ 1.000000e+00, %entry ]
+  %mul = fmul fast float %result, %base
+  %inc = add nuw nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %inc, %exp
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:                                             ; preds = %for.body, %entry
+  %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+  ret float %result.lcssa
+}
+
+define float @powi_canonical_iv_unsigned(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_canonical_iv_unsigned(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT:    br i1 [[CMP_EQ]], label [[EXIT:%.*]], label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[FOR_BODY]] ]
+; CHECK-NEXT:    ret float [[RESULT_LCSSA]]
+;
+entry:
+  %cmp.eq = icmp eq i32 %exp, 0
+  br i1 %cmp.eq, label %exit, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %result = phi float [ %mul, %for.body ], [ 1.000000e+00, %entry ]
+  %mul = fmul fast float %result, %base
+  %inc = add nuw nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %inc, %exp
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:                                             ; preds = %for.body, %entry
+  %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+  ret float %result.lcssa
+}
+
+define float @powi_const_i32_exp(float %base) {
+; CHECK-LABEL: define float @powi_const_i32_exp(
+; CHECK-SAME: float [[BASE:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[RESULT:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 2147483647
+; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret float [[MUL]]
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %result = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+  %mul = fmul fast float %result, %base
+  %inc = add nuw nsw i32 %iv, 1
+  %cmp = icmp eq i32 %inc, 2147483647
+  br i1 %cmp, label %exit, label %for.body
+
+exit:                                             ; preds = %for.body
+  ret float %mul
+}
+
+define float @powi_unrelated_computation(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_unrelated_computation(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[EXP]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[EXIT:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[UNRELATED:%.*]] = phi i32 [ [[UNRELATED_XOR:%.*]], [[FOR_BODY]] ], [ 5, [[ENTRY]] ]
+; CHECK-NEXT:    [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[UNRELATED_XOR]] = xor i32 [[IV]], [[UNRELATED]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cleanup:
+; CHECK-NEXT:    [[TMP0:%.*]] = sitofp i32 [[UNRELATED_XOR]] to float
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[MUL]], [[TMP0]]
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[ADD:%.*]] = phi float [ 6.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[FOR_CLEANUP]] ]
+; CHECK-NEXT:    ret float [[ADD]]
+;
+entry:
+  %cmp = icmp sgt i32 %exp, 0
+  br i1 %cmp, label %for.body, label %exit
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %unrelated = phi i32 [ %unrelated.xor, %for.body ], [ 5, %entry ]
+  %result = phi float [ %mul, %for.body ], [ 1.000000e+00, %entry ]
+  %mul = fmul fast float %result, %base
+  %unrelated.xor = xor i32 %iv, %unrelated
+  %inc = add nuw nsw i32 %iv, 1
+  %exitcond = icmp eq i32 %inc, %exp
+  br i1 %exitcond, label %for.cleanup, label %for.body
+
+for.cleanup:                                     ; preds = %for.body
+  %0 = sitofp i32 %unrelated.xor to float
+  %1 = fadd fast float %mul, %0
+  br label %exit
+
+exit:                                           ; preds = %for.cleanup, %entry
+  %add = phi float [ 6.000000e+00, %entry ], [ %1, %for.cleanup ]
+  ret float %add
+}
+
+; Negative tests
+
+; The powi idiom is only legal for a base of floating-point type
+define i32 @powi_i32_base(i32 %base, i32 %exp) {
+; CHECK-LABEL: define i32 @powi_i32_base(
+; CHECK-SAME: i32 [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[RESULT:%.*]] = phi i32 [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL]] = mul nsw i32 [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT:    br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[RESULT_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    ret i32 [[RESULT_LCSSA]]
+;
+entry:
+  %cmp.not = icmp eq i32 %exp, 0
+  br i1 %cmp.not, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %result = phi i32 [ %mul, %while.body ], [ 1, %entry ]
+  %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+  %mul = mul nsw i32 %result, %base
+  %dec = add nsw i32 %merge.dec, -1
+  %cmp.eq = icmp eq i32 %dec, 0
+  br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %result.lcssa = phi i32 [ 1, %entry ], [ %mul, %while.body ]
+  ret i32 %result.lcssa
+}
+
+; The powi idiom is only legal in -ffast-math mode
+define float @powi_nofast(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_nofast(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul float [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT:    br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    ret float [[RESULT_LCSSA]]
+;
+entry:
+  %cmp.not = icmp eq i32 %exp, 0
+  br i1 %cmp.not, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %result = phi float [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+  %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+  %mul = fmul float %result, %base
+  %dec = add nsw i32 %merge.dec, -1
+  %cmp.eq = icmp eq i32 %dec, 0
+  br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+  ret float %result.lcssa
+}
+
+; llvm.powi can only be generated for an exponent that is, at most, a C int type.
+define double @powi_i64_iv(double %base, i64 %exp) {
+; CHECK-LABEL: define double @powi_i64_iv(
+; CHECK-SAME: double [[BASE:%.*]], i64 [[EXP:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i64 [[EXP]], 0
+; CHECK-NEXT:    br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MERGE_DEC:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[DEC]] = add nsw i64 [[MERGE_DEC]], -1
+; CHECK-NEXT:    [[CMP_EQ:%.*]] = icmp eq i64 [[DEC]], 0
+; CHECK-NEXT:    br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    ret double [[RESULT_LCSSA]]
+;
+entry:
+  %cmp.not = icmp eq i64 %exp, 0
+  br i1 %cmp.not, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %result = phi double [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+  %merge.dec = phi i64 [ %dec, %while.body ], [ %exp, %entry ]
+  %mul = fmul fast double %result, %base
+  %dec = add nsw i64 %merge.dec, -1
+  %cmp.eq = icmp eq i64 %dec, 0
+  br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  %result.lcssa = phi double [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+  ret double %result.lcssa
+}
+
+define float @powi_const_i64_iv(float %base) {
+; CHECK-LABEL: define float @powi_const_i64_iv(
+; CHECK-SAME: float [[BASE:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
+; CHECK:       while.body:
+; CHECK-NEXT:    [[EXP:%.*]] = phi i64 [ 2147483648, [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    [[RESULT:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT:    [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT:    [[DEC]] = add nsw i64 [[EXP]], -1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[DEC]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK:       while.end:
+; CHECK-NEXT:    ret float [[MUL]]
+;
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %exp = phi i64 [ 2147483648, %entry ], [ %dec, %while.body ]
+  %result = phi float [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+  %mul = fmul fast float %result, %base
+  %dec = add nsw i64 %exp, -1
+  %cmp = icmp eq i64 %dec, 0
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body
+  ret float %mul
+}

The following code, when compiled under -ffast-math, produces bad
codegen due to LoopVectorize:

  float powi(float base, int exp) {
    float result = 1.0;
      for (int i = 0; i < exp; ++i)
        result *= base;
    return result;
  }

It can easily be replaced with the llvm.powi intrinsic, when the
exponent is a C int type. This is the job of LoopIdiomRecognize, and has
been marked as a TODO item for years. In preparation to fulfill this
wish, add negative tests corresponding to variations of this program.
@eopXD
Copy link
Member

eopXD commented Dec 7, 2023

Do you mind providing a snippet through godbolt on the options that produce the bad vectorized code you have mentioned? Specifically what backend?

@artagnon
Copy link
Contributor Author

Hi @eopXD, have a look at this codegen on x86-64 for instance: https://godbolt.org/z/4v7sYodo9.

@artagnon
Copy link
Contributor Author

artagnon commented Jan 8, 2024

Gentle ping.

@artagnon artagnon closed this Apr 6, 2024
@artagnon artagnon deleted the lir-powi-test branch April 6, 2024 13:03
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants