-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
LoopIdiomRecognize: add negative tests for powi idiom #72648
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesThe following code, when compiled under -ffast-math, produces bad codegen due to LoopVectorize: float powi(float base, int exp) {
float result = 1.0;
for (int i = 0; i < exp; ++i)
result *= base;
return result;
} It can easily be replaced with the llvm.powi intrinsic, when the exponent is, at most, a C int type. This is the job of LoopIdiomRecognize, and has been marked as a TODO item for years. In preparation to fulfill this wish, add negative tests corresponding to variations of this program. Full diff: https://github.com/llvm/llvm-project/pull/72648.diff 1 Files Affected:
diff --git a/llvm/test/Transforms/LoopIdiom/powi.ll b/llvm/test/Transforms/LoopIdiom/powi.ll
new file mode 100644
index 000000000000000..63e9fbce5a931eb
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/powi.ll
@@ -0,0 +1,386 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes='loop(loop-idiom,loop-deletion,indvars),function(gvn,simplifycfg)' < %s -S | FileCheck %s
+
+define float @powi_f32(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_f32(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i32 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi float [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul fast float %result, %base
+ %dec = add nsw i32 %merge.dec, -1
+ %cmp.eq = icmp eq i32 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret float %result.lcssa
+}
+
+define double @powi_f64(double %base, i32 %exp) {
+; CHECK-LABEL: define double @powi_f64(
+; CHECK-SAME: double [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret double [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i32 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi double [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul fast double %result, %base
+ %dec = add nsw i32 %merge.dec, -1
+ %cmp.eq = icmp eq i32 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi double [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret double %result.lcssa
+}
+
+define double @powi_i16_iv(double %base, i16 %exp) {
+; CHECK-LABEL: define double @powi_i16_iv(
+; CHECK-SAME: double [[BASE:%.*]], i16 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i16 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i16 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i16 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i16 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret double [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i16 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi double [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i16 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul fast double %result, %base
+ %dec = add nsw i16 %merge.dec, -1
+ %cmp.eq = icmp eq i16 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi double [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret double %result.lcssa
+}
+
+define float @powi_canonical_iv_signed(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_canonical_iv_signed(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_SGT:%.*]] = icmp sgt i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_SGT]], label [[FOR_BODY:%.*]], label [[EXIT:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[FOR_BODY]] ]
+; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.sgt = icmp sgt i32 %exp, 0
+ br i1 %cmp.sgt, label %for.body, label %exit
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %result = phi float [ %mul, %for.body ], [ 1.000000e+00, %entry ]
+ %mul = fmul fast float %result, %base
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, %exp
+ br i1 %exitcond, label %exit, label %for.body
+
+exit: ; preds = %for.body, %entry
+ %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+ ret float %result.lcssa
+}
+
+define float @powi_canonical_iv_unsigned(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_canonical_iv_unsigned(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[EXIT:%.*]], label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[FOR_BODY]] ]
+; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.eq = icmp eq i32 %exp, 0
+ br i1 %cmp.eq, label %exit, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %result = phi float [ %mul, %for.body ], [ 1.000000e+00, %entry ]
+ %mul = fmul fast float %result, %base
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, %exp
+ br i1 %exitcond, label %exit, label %for.body
+
+exit: ; preds = %for.body, %entry
+ %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+ ret float %result.lcssa
+}
+
+define float @powi_const_i32_exp(float %base) {
+; CHECK-LABEL: define float @powi_const_i32_exp(
+; CHECK-SAME: float [[BASE:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 2147483647
+; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[FOR_BODY]]
+; CHECK: exit:
+; CHECK-NEXT: ret float [[MUL]]
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %result = phi float [ 1.000000e+00, %entry ], [ %mul, %for.body ]
+ %mul = fmul fast float %result, %base
+ %inc = add nuw nsw i32 %iv, 1
+ %cmp = icmp eq i32 %inc, 2147483647
+ br i1 %cmp, label %exit, label %for.body
+
+exit: ; preds = %for.body
+ ret float %mul
+}
+
+define float @powi_unrelated_computation(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_unrelated_computation(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[EXIT:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[UNRELATED:%.*]] = phi i32 [ [[UNRELATED_XOR:%.*]], [[FOR_BODY]] ], [ 5, [[ENTRY]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[FOR_BODY]] ], [ 1.000000e+00, [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[UNRELATED_XOR]] = xor i32 [[IV]], [[UNRELATED]]
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[EXP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK: for.cleanup:
+; CHECK-NEXT: [[TMP0:%.*]] = sitofp i32 [[UNRELATED_XOR]] to float
+; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[MUL]], [[TMP0]]
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: [[ADD:%.*]] = phi float [ 6.000000e+00, [[ENTRY]] ], [ [[TMP1]], [[FOR_CLEANUP]] ]
+; CHECK-NEXT: ret float [[ADD]]
+;
+entry:
+ %cmp = icmp sgt i32 %exp, 0
+ br i1 %cmp, label %for.body, label %exit
+
+for.body: ; preds = %entry, %for.body
+ %iv = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %unrelated = phi i32 [ %unrelated.xor, %for.body ], [ 5, %entry ]
+ %result = phi float [ %mul, %for.body ], [ 1.000000e+00, %entry ]
+ %mul = fmul fast float %result, %base
+ %unrelated.xor = xor i32 %iv, %unrelated
+ %inc = add nuw nsw i32 %iv, 1
+ %exitcond = icmp eq i32 %inc, %exp
+ br i1 %exitcond, label %for.cleanup, label %for.body
+
+for.cleanup: ; preds = %for.body
+ %0 = sitofp i32 %unrelated.xor to float
+ %1 = fadd fast float %mul, %0
+ br label %exit
+
+exit: ; preds = %for.cleanup, %entry
+ %add = phi float [ 6.000000e+00, %entry ], [ %1, %for.cleanup ]
+ ret float %add
+}
+
+; Negative tests
+
+; The powi idiom is only legal for a base of floating-point type
+define i32 @powi_i32_base(i32 %base, i32 %exp) {
+; CHECK-LABEL: define i32 @powi_i32_base(
+; CHECK-SAME: i32 [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi i32 [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = mul nsw i32 [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret i32 [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i32 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi i32 [ %mul, %while.body ], [ 1, %entry ]
+ %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = mul nsw i32 %result, %base
+ %dec = add nsw i32 %merge.dec, -1
+ %cmp.eq = icmp eq i32 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi i32 [ 1, %entry ], [ %mul, %while.body ]
+ ret i32 %result.lcssa
+}
+
+; The powi idiom is only legal in -ffast-math mode
+define float @powi_nofast(float %base, i32 %exp) {
+; CHECK-LABEL: define float @powi_nofast(
+; CHECK-SAME: float [[BASE:%.*]], i32 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i32 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i32 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i32 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret float [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i32 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi float [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i32 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul float %result, %base
+ %dec = add nsw i32 %merge.dec, -1
+ %cmp.eq = icmp eq i32 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi float [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret float %result.lcssa
+}
+
+; llvm.powi can only be generated for an exponent that is, at most, a C int type.
+define double @powi_i64_iv(double %base, i64 %exp) {
+; CHECK-LABEL: define double @powi_i64_iv(
+; CHECK-SAME: double [[BASE:%.*]], i64 [[EXP:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[EXP]], 0
+; CHECK-NEXT: br i1 [[CMP_NOT]], label [[WHILE_END:%.*]], label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[RESULT:%.*]] = phi double [ [[MUL:%.*]], [[WHILE_BODY]] ], [ 1.000000e+00, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[MERGE_DEC:%.*]] = phi i64 [ [[DEC:%.*]], [[WHILE_BODY]] ], [ [[EXP]], [[ENTRY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast double [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i64 [[MERGE_DEC]], -1
+; CHECK-NEXT: [[CMP_EQ:%.*]] = icmp eq i64 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP_EQ]], label [[WHILE_END]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: [[RESULT_LCSSA:%.*]] = phi double [ 1.000000e+00, [[ENTRY]] ], [ [[MUL]], [[WHILE_BODY]] ]
+; CHECK-NEXT: ret double [[RESULT_LCSSA]]
+;
+entry:
+ %cmp.not = icmp eq i64 %exp, 0
+ br i1 %cmp.not, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %result = phi double [ %mul, %while.body ], [ 1.000000e+00, %entry ]
+ %merge.dec = phi i64 [ %dec, %while.body ], [ %exp, %entry ]
+ %mul = fmul fast double %result, %base
+ %dec = add nsw i64 %merge.dec, -1
+ %cmp.eq = icmp eq i64 %dec, 0
+ br i1 %cmp.eq, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %result.lcssa = phi double [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ ret double %result.lcssa
+}
+
+define float @powi_const_i64_iv(float %base) {
+; CHECK-LABEL: define float @powi_const_i64_iv(
+; CHECK-SAME: float [[BASE:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[WHILE_BODY:%.*]]
+; CHECK: while.body:
+; CHECK-NEXT: [[EXP:%.*]] = phi i64 [ 2147483648, [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[RESULT:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[MUL:%.*]], [[WHILE_BODY]] ]
+; CHECK-NEXT: [[MUL]] = fmul fast float [[RESULT]], [[BASE]]
+; CHECK-NEXT: [[DEC]] = add nsw i64 [[EXP]], -1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[DEC]], 0
+; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_END:%.*]], label [[WHILE_BODY]]
+; CHECK: while.end:
+; CHECK-NEXT: ret float [[MUL]]
+;
+entry:
+ br label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %exp = phi i64 [ 2147483648, %entry ], [ %dec, %while.body ]
+ %result = phi float [ 1.000000e+00, %entry ], [ %mul, %while.body ]
+ %mul = fmul fast float %result, %base
+ %dec = add nsw i64 %exp, -1
+ %cmp = icmp eq i64 %dec, 0
+ br i1 %cmp, label %while.end, label %while.body
+
+while.end: ; preds = %while.body
+ ret float %mul
+}
|
ee30fb7
to
a9e6cb2
Compare
The following code, when compiled under -ffast-math, produces bad codegen due to LoopVectorize: float powi(float base, int exp) { float result = 1.0; for (int i = 0; i < exp; ++i) result *= base; return result; } It can easily be replaced with the llvm.powi intrinsic, when the exponent is a C int type. This is the job of LoopIdiomRecognize, and has been marked as a TODO item for years. In preparation to fulfill this wish, add negative tests corresponding to variations of this program.
a9e6cb2
to
392e99c
Compare
Do you mind providing a snippet through godbolt on the options that produce the bad vectorized code you have mentioned? Specifically what backend? |
Hi @eopXD, have a look at this codegen on x86-64 for instance: https://godbolt.org/z/4v7sYodo9. |
Gentle ping. |
The following code, when compiled under -ffast-math, produces bad codegen due to LoopVectorize:
It can easily be replaced with the llvm.powi intrinsic, when the exponent is a C int type. This is the job of LoopIdiomRecognize, and has been marked as a TODO item for years. In preparation to fulfill this wish, add negative tests corresponding to variations of this program.