-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[InstCombine] Canonicalise packed-integer-selecting shifts #162147
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: None (zGoldthorpe) ChangesThis patch resolves recent regressions related to issue #92891. define i16 @<!-- -->src(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
%upper.shl = shl nuw i32 %upper, 16
%pack = or disjoint i32 %upper.shl, %lower
%mask.bit = and i32 %mask, 16
%sel = lshr i32 %pack, %mask.bit
%trunc = trunc i32 %sel to i16
ret i16 %trunc
}
; =>
define i16 @<!-- -->tgt(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
%mask.bit = and i32 %mask, 16
%mask.bit.z = icmp eq i32 %mask.bit, 0
%sel = select i1 %mask.bit.z, i32 %lower, i32 %upper
%trunc = trunc i32 %sel to i16
ret i16 %trunc
} Alive2 proofs: gJ9MpP Full diff: https://github.com/llvm/llvm-project/pull/162147.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index aa030294ff1e5..c018161fbfa44 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -800,6 +800,48 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
} else {
llvm::computeKnownBits(I, Known, Q, Depth);
+
+ // Let N = 2 * M.
+ // Given an N-bit integer representing a pack of two M-bit integers,
+ // we can select one of the packed integers by right-shifting by either
+ // zero or M (which is the most straightforward to check if M is a power
+ // of 2), and then isolating the lower M bits. In this case, we can
+ // represent the shift as a select on whether the shr amount is nonzero.
+ uint64_t ShlAmt;
+ Value *Upper, *Lower;
+ if (!match(I->getOperand(0),
+ m_OneUse(m_DisjointOr(
+ m_OneUse(m_Shl(m_Value(Upper), m_ConstantInt(ShlAmt))),
+ m_Value(Lower)))))
+ break;
+ if (!isPowerOf2_64(ShlAmt))
+ break;
+
+ const uint64_t DemandedBitWidth = DemandedMask.getActiveBits();
+ if (DemandedBitWidth > ShlAmt)
+ break;
+
+ // Check that upper demanded bits are not lost from lshift.
+ if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth)
+ break;
+
+ KnownBits KnownLowerBits = computeKnownBits(Lower, I, Depth);
+ if (!KnownLowerBits.getMaxValue().isIntN(ShlAmt))
+ break;
+
+ Value *ShrAmt = I->getOperand(1);
+ KnownBits KnownShrBits = computeKnownBits(ShrAmt, I, Depth);
+ // Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or
+ // zero.
+ if ((~KnownShrBits.Zero).getZExtValue() != ShlAmt)
+ break;
+
+ Value *ShrAmtZ = Builder.CreateICmpEQ(
+ ShrAmt, Constant::getNullValue(ShrAmt->getType()),
+ ShrAmt->getName() + ".z");
+ Value *Select = Builder.CreateSelect(ShrAmtZ, Lower, Upper);
+ Select->takeName(I);
+ return Select;
}
break;
}
diff --git a/llvm/test/Transforms/InstCombine/fold-selective-shift.ll b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
new file mode 100644
index 0000000000000..18214c5edfeb9
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/fold-selective-shift.ll
@@ -0,0 +1,269 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=instcombine %s -S | FileCheck %s
+
+declare void @clobber.i32(i32)
+
+define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[SEL_V]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.commute(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[SEL_V]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %lower.zext, %upper.shl
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.range(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.shl = shl nuw i32 %upper, 16
+ %pack = or disjoint i32 %upper.shl, %lower
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i32 @selective_shift_16.masked(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_16.masked(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
+; CHECK-NEXT: ret i32 [[SEL]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %lower.zext, %upper.shl
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %sel.masked = and i32 %sel, 65535
+ ret i32 %sel.masked
+}
+
+define <2 x i16> @selective_shift.v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) {
+; CHECK-LABEL: define <2 x i16> @selective_shift.v16(
+; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16)
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer
+; CHECK-NEXT: [[SEL_V:%.*]] = select <2 x i1> [[MASK_BIT_Z]], <2 x i16> [[LOWER]], <2 x i16> [[UPPER]]
+; CHECK-NEXT: ret <2 x i16> [[SEL_V]]
+;
+ %upper.zext = zext <2 x i16> %upper to <2 x i32>
+ %upper.shl = shl nuw <2 x i32> %upper.zext, splat(i32 16)
+ %lower.zext = zext <2 x i16> %lower to <2 x i32>
+ %pack = or disjoint <2 x i32> %upper.shl, %lower.zext
+ %mask.bit = and <2 x i32> %mask, splat(i32 16)
+ %sel = lshr <2 x i32> %pack, %mask.bit
+ %trunc = trunc <2 x i32> %sel to <2 x i16>
+ ret <2 x i16> %trunc
+}
+
+define i16 @selective_shift_16.wide(i64 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.wide(
+; CHECK-SAME: i64 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[SEL_V]]
+;
+ %upper.zext = zext i16 %upper to i64
+ %upper.shl = shl nuw i64 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i64
+ %pack = or disjoint i64 %upper.shl, %lower.zext
+ %mask.bit = and i64 %mask, 16
+ %sel = lshr i64 %pack, %mask.bit
+ %trunc = trunc i64 %sel to i16
+ ret i16 %trunc
+}
+
+; narrow zext type blocks fold
+define i16 @selective_shift_16.narrow(i24 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.narrow(
+; CHECK-SAME: i24 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i24
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl i24 [[UPPER_ZEXT]], 16
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i24
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i24 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i24 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i24 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i24 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i24
+ %upper.shl = shl i24 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i24
+ %pack = or disjoint i24 %upper.shl, %lower.zext
+ %mask.bit = and i24 %mask, 16
+ %sel = lshr i24 %pack, %mask.bit
+ %trunc = trunc i24 %sel to i16
+ ret i16 %trunc
+}
+
+; %lower's upper bits block fold
+define i16 @selective_shift_16_norange(i32 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16_norange(
+; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER]], 16
+; CHECK-NEXT: [[PACK:%.*]] = or i32 [[UPPER_SHL]], [[LOWER]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.shl = shl nuw i32 %upper, 16
+ %pack = or i32 %upper.shl, %lower
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+define i16 @selective_shift_16.mu.0(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.0(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT: call void @clobber.i32(i32 [[UPPER_ZEXT]])
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT: call void @clobber.i32(i32 [[LOWER_ZEXT]])
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
+; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i32
+ call void @clobber.i32(i32 %upper.zext)
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ call void @clobber.i32(i32 %lower.zext)
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+; multi-use of %pack blocks fold
+define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.1(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: call void @clobber.i32(i32 [[PACK]])
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ call void @clobber.i32(i32 %pack)
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+; non-truncated use of %sel blocks fold
+define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) {
+; CHECK-LABEL: define i16 @selective_shift_16.mu.2(
+; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
+; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: call void @clobber.i32(i32 [[SEL]])
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+;
+ %upper.zext = zext i16 %upper to i32
+ %upper.shl = shl nuw i32 %upper.zext, 16
+ %lower.zext = zext i16 %lower to i32
+ %pack = or disjoint i32 %upper.shl, %lower.zext
+ %mask.bit = and i32 %mask, 16
+ %sel = lshr i32 %pack, %mask.bit
+ call void @clobber.i32(i32 %sel)
+ %trunc = trunc i32 %sel to i16
+ ret i16 %trunc
+}
+
+; bitwidth must be a power of 2 to fold
+define i24 @selective_shift_24(i48 %mask, i24 %upper, i24 %lower) {
+; CHECK-LABEL: define i24 @selective_shift_24(
+; CHECK-SAME: i48 [[MASK:%.*]], i24 [[UPPER:%.*]], i24 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i24 [[UPPER]] to i48
+; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i48 [[UPPER_ZEXT]], 24
+; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i24 [[LOWER]] to i48
+; CHECK-NEXT: [[PACK:%.*]] = or disjoint i48 [[UPPER_SHL]], [[LOWER_ZEXT]]
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i48 [[MASK]], 24
+; CHECK-NEXT: [[SEL:%.*]] = lshr i48 [[PACK]], [[MASK_BIT]]
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i48 [[SEL]] to i24
+; CHECK-NEXT: ret i24 [[TRUNC]]
+;
+ %upper.zext = zext i24 %upper to i48
+ %upper.shl = shl nuw i48 %upper.zext, 24
+ %lower.zext = zext i24 %lower to i48
+ %pack = or disjoint i48 %upper.shl, %lower.zext
+ %mask.bit = and i48 %mask, 24
+ %sel = lshr i48 %pack, %mask.bit
+ %trunc = trunc i48 %sel to i24
+ ret i24 %trunc
+}
+
+define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) {
+; CHECK-LABEL: define i32 @selective_shift_32(
+; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
+; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
+; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
+; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
+; CHECK-NEXT: ret i32 [[SEL_V]]
+;
+ %upper.zext = zext i32 %upper to i64
+ %upper.shl = shl nuw i64 %upper.zext, 32
+ %lower.zext = zext i32 %lower to i64
+ %pack = or disjoint i64 %upper.shl, %lower.zext
+ %mask.bit = and i64 %mask, 32
+ %sel = lshr i64 %pack, %mask.bit
+ %trunc = trunc i64 %sel to i32
+ ret i32 %trunc
+}
|
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
This patch resolves recent regressions related to issue #92891.
It specifically enables the following types of reductions.
Alive2 proofs: gJ9MpP