Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 17 additions & 6 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4003,18 +4003,29 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {

// Try to fold intrinsic into select/phi operands. This is legal if:
// * The intrinsic is speculatable.
// * The select condition is not a vector, or the intrinsic does not
// perform cross-lane operations.
if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI) &&
isNotCrossLaneOperation(II))
// * The operand is one of the following:
// - a phi.
// - a select with a scalar condition.
// - a select with a vector condition and II is not a cross lane operation.
if (isSafeToSpeculativelyExecuteWithVariableReplaced(&CI)) {
for (Value *Op : II->args()) {
if (auto *Sel = dyn_cast<SelectInst>(Op))
if (Instruction *R = FoldOpIntoSelect(*II, Sel))
if (auto *Sel = dyn_cast<SelectInst>(Op)) {
bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
if (IsVectorCond && !isNotCrossLaneOperation(II))
continue;
// Don't replace a scalar select with a more expensive vector select if
// we can't simplify both arms of the select.
bool SimplifyBothArms =
!Op->getType()->isVectorTy() && II->getType()->isVectorTy();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please point out the test coverage for this special case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was due to regressions in LoopVectorize/AArch64 as a get.active.lane.mask(%base, select %cond, %n, 0) was replaced with select between the mask and zeroinitializer, which is a more expensive operation.

I'll add a negative test in intrinsic-select.ll.

if (Instruction *R = FoldOpIntoSelect(
*II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
return R;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some overflow intrinsic calls (e.g., llvm.uadd.with.overflow.i64) now get folded into the PHI node. It looks unexpected. Can you add some tests to cover these cases (overflow intrinsic + select/PHI)?

See https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2962/files.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some test cases, I don't think it's entirely unexpected as isNotCrossLaneOperation is ~the same as isTriviallyVectorizable(), and the overflow intrinsics currently are not trivially vectorizable.

So now isNotCrossLaneOperation is only checked for vector select conditions, it means these can also get folded.

if (auto *Phi = dyn_cast<PHINode>(Op))
if (Instruction *R = foldOpIntoPhi(*II, Phi))
return R;
}
}

if (Instruction *Shuf = foldShuffledIntrinsicOperands(II))
return Shuf;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/InstCombine/InstCombineInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
/// This also works for Cast instructions, which obviously do not have a
/// second operand.
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
bool FoldWithMultiUse = false);
bool FoldWithMultiUse = false,
bool SimplifyBothArms = false);

/// This is a convenience wrapper function for the above two functions.
Instruction *foldBinOpIntoSelectOrPhi(BinaryOperator &I);
Expand Down
6 changes: 5 additions & 1 deletion llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1777,7 +1777,8 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
}

Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
bool FoldWithMultiUse) {
bool FoldWithMultiUse,
bool SimplifyBothArms) {
// Don't modify shared select instructions unless set FoldWithMultiUse
if (!SI->hasOneUse() && !FoldWithMultiUse)
return nullptr;
Expand Down Expand Up @@ -1821,6 +1822,9 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
if (!NewTV && !NewFV)
return nullptr;

if (SimplifyBothArms && !(NewTV && NewFV))
return nullptr;

// Create an instruction for the arm that did not fold.
if (!NewTV)
NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
Expand Down
36 changes: 34 additions & 2 deletions llvm/test/Transforms/InstCombine/intrinsic-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,7 @@ declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)

define i32 @vec_to_scalar_select_scalar(i1 %b) {
; CHECK-LABEL: @vec_to_scalar_select_scalar(
; CHECK-NEXT: [[S:%.*]] = select i1 [[B:%.*]], <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4>
; CHECK-NEXT: [[C:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[S]])
; CHECK-NEXT: [[C:%.*]] = select i1 [[B:%.*]], i32 3, i32 7
; CHECK-NEXT: ret i32 [[C]]
;
%s = select i1 %b, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 3, i32 4>
Expand Down Expand Up @@ -371,3 +370,36 @@ define float @test_fabs_select_multiuse_both_constant(i1 %cond, float %x) {
%fabs = call float @llvm.fabs.f32(float %select)
ret float %fabs
}

; Negative test: Don't replace with select between vector mask and zeroinitializer.
define <16 x i1> @test_select_of_active_lane_mask_bound(i64 %base, i64 %n, i1 %cond) {
; CHECK-LABEL: @test_select_of_active_lane_mask_bound(
; CHECK-NEXT: [[S:%.*]] = select i1 [[COND:%.*]], i64 [[N:%.*]], i64 0
; CHECK-NEXT: [[MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 [[BASE:%.*]], i64 [[S]])
; CHECK-NEXT: ret <16 x i1> [[MASK]]
;
%s = select i1 %cond, i64 %n, i64 0
%mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %base, i64 %s)
ret <16 x i1> %mask
}

define <16 x i1> @test_select_of_active_lane_mask_bound_both_constant(i64 %base, i64 %n, i1 %cond) {
; CHECK-LABEL: @test_select_of_active_lane_mask_bound_both_constant(
; CHECK-NEXT: [[MASK:%.*]] = select i1 [[COND:%.*]], <16 x i1> splat (i1 true), <16 x i1> zeroinitializer
; CHECK-NEXT: ret <16 x i1> [[MASK]]
;
%s = select i1 %cond, i64 16, i64 0
%mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 0, i64 %s)
ret <16 x i1> %mask
}

define { i64, i1 } @test_select_of_overflow_intrinsic_operand(i64 %n, i1 %cond) {
; CHECK-LABEL: @test_select_of_overflow_intrinsic_operand(
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[N:%.*]], i64 42)
; CHECK-NEXT: [[ADD_OVERFLOW:%.*]] = select i1 [[COND:%.*]], { i64, i1 } [[TMP1]], { i64, i1 } { i64 42, i1 false }
; CHECK-NEXT: ret { i64, i1 } [[ADD_OVERFLOW]]
;
%s = select i1 %cond, i64 %n, i64 0
%add_overflow = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s, i64 42)
ret { i64, i1 } %add_overflow
}
50 changes: 50 additions & 0 deletions llvm/test/Transforms/InstCombine/phi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3026,6 +3026,56 @@ join:
ret i32 %umax
}

define i32 @cross_lane_intrinsic_over_phi(i1 %c, i1 %c2, <4 x i32> %a) {
; CHECK-LABEL: @cross_lane_intrinsic_over_phi(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]]
; CHECK: if:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[A:%.*]])
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP0]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: call void @may_exit()
; CHECK-NEXT: ret i32 [[PHI]]
;
entry:
br i1 %c, label %if, label %join

if:
br label %join

join:
%phi = phi <4 x i32> [ %a, %if ], [ zeroinitializer, %entry ]
call void @may_exit()
%sum = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %phi)
ret i32 %sum
}

define { i64, i1 } @overflow_intrinsic_over_phi(i1 %c, i64 %a) {
; CHECK-LABEL: @overflow_intrinsic_over_phi(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]]
; CHECK: if:
; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 1)
; CHECK-NEXT: br label [[JOIN]]
; CHECK: join:
; CHECK-NEXT: [[PHI:%.*]] = phi { i64, i1 } [ [[TMP0]], [[IF]] ], [ { i64 1, i1 false }, [[ENTRY:%.*]] ]
; CHECK-NEXT: call void @may_exit()
; CHECK-NEXT: ret { i64, i1 } [[PHI]]
;
entry:
br i1 %c, label %if, label %join

if:
br label %join

join:
%phi = phi i64 [ %a, %if ], [ 0, %entry ]
call void @may_exit()
%add_overflow = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %phi, i64 1)
ret { i64, i1 } %add_overflow
}

define i32 @multiple_intrinsics_with_multiple_phi_uses(i1 %c, i32 %arg) {
; CHECK-LABEL: @multiple_intrinsics_with_multiple_phi_uses(
; CHECK-NEXT: entry:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/InstCombine/select_frexp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ define float @test_select_frexp_no_const(float %x, float %y, i1 %cond) {
define i32 @test_select_frexp_extract_exp(float %x, i1 %cond) {
; CHECK-LABEL: define i32 @test_select_frexp_extract_exp(
; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], float 1.000000e+00, float [[X]]
; CHECK-NEXT: [[FREXP:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SEL]])
; CHECK-NEXT: [[FREXP:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
; CHECK-NEXT: [[FREXP_1:%.*]] = extractvalue { float, i32 } [[FREXP]], 1
; CHECK-NEXT: ret i32 [[FREXP_1]]
; CHECK-NEXT: [[FREXP_2:%.*]] = select i1 [[COND]], i32 1, i32 [[FREXP_1]]
; CHECK-NEXT: ret i32 [[FREXP_2]]
;
%sel = select i1 %cond, float 1.000000e+00, float %x
%frexp = call { float, i32 } @llvm.frexp.f32.i32(float %sel)
Expand All @@ -132,7 +132,7 @@ define float @test_select_frexp_fast_math_select(float %x, i1 %cond) {
; CHECK-SAME: float [[X:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[FREXP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]])
; CHECK-NEXT: [[MANTISSA:%.*]] = extractvalue { float, i32 } [[FREXP1]], 0
; CHECK-NEXT: [[SELECT_FREXP:%.*]] = select nnan ninf nsz i1 [[COND]], float 5.000000e-01, float [[MANTISSA]]
; CHECK-NEXT: [[SELECT_FREXP:%.*]] = select i1 [[COND]], float 5.000000e-01, float [[MANTISSA]]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The FMFs end up getting dropped here as this was previously handled by foldFrexpOfSelect, but now the more general FoldOpIntoSelect handles it first.

; CHECK-NEXT: ret float [[SELECT_FREXP]]
;
%sel = select nnan ninf nsz i1 %cond, float 1.000000e+00, float %x
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ define void @ham() #1 {
; CHECK-NEXT: [[SNORK_EXIT:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr inttoptr (i64 48 to ptr), align 16
; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i64 [[TMP0]], 0
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer, i64 0)
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP1]], <vscale x 16 x float> [[TMP2]], <vscale x 16 x float> undef
; CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[SPEC_SELECT]], i64 0)
; CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> undef, i64 0)
; CHECK-NEXT: [[TMP3:%.*]] = select i1 [[TMP1]], <vscale x 4 x float> zeroinitializer, <vscale x 4 x float> [[TMP2]]
; CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 0, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> zeroinitializer, <vscale x 4 x float> [[TMP3]])
; CHECK-NEXT: ret void
;
Expand Down