From 20853a7807790a6b5ca13aab1edb0b4e96199915 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 18 Dec 2018 19:59:50 +0000 Subject: [PATCH] [InstCombine] Simplify cttz/ctlz + icmp eq/ne into mask check Checking whether a number has a certain number of trailing / leading zeros means checking whether it is of the form XXXX1000 / 0001XXXX, which can be done with an and+icmp. Related to https://bugs.llvm.org/show_bug.cgi?id=28668. As a next step, this can be extended to non-equality predicates. Differential Revision: https://reviews.llvm.org/D55745 llvm-svn: 349530 --- .../InstCombine/InstCombineCompares.cpp | 25 ++++++- .../Transforms/InstCombine/cmp-intrinsic.ll | 68 ++++++++++++------- .../test/Transforms/InstCombine/intrinsics.ll | 12 +--- 3 files changed, 68 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 38539936a24d8..b5bbb09935e27 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -2765,6 +2765,7 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, // Handle icmp {eq|ne} , Constant. Type *Ty = II->getType(); + unsigned BitWidth = C.getBitWidth(); switch (II->getIntrinsicID()) { case Intrinsic::bswap: Worklist.Add(II); @@ -2773,21 +2774,39 @@ Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp, return &Cmp; case Intrinsic::ctlz: - case Intrinsic::cttz: + case Intrinsic::cttz: { // ctz(A) == bitwidth(A) -> A == 0 and likewise for != - if (C == C.getBitWidth()) { + if (C == BitWidth) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); Cmp.setOperand(1, ConstantInt::getNullValue(Ty)); return &Cmp; } + + // ctz(A) == C -> A & Mask1 == Mask2, where Mask2 only has bit C set + // and Mask1 has bits 0..C+1 set. Similar for ctl, but for high bits. + // Limit to one use to ensure we don't increase instruction count. + unsigned Num = C.getLimitedValue(BitWidth); + if (Num != BitWidth && II->hasOneUse()) { + bool IsTrailing = II->getIntrinsicID() == Intrinsic::cttz; + APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1) + : APInt::getHighBitsSet(BitWidth, Num + 1); + APInt Mask2 = IsTrailing + ? APInt::getOneBitSet(BitWidth, Num) + : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); + Cmp.setOperand(0, Builder.CreateAnd(II->getArgOperand(0), Mask1)); + Cmp.setOperand(1, ConstantInt::get(Ty, Mask2)); + Worklist.Add(II); + return &Cmp; + } break; + } case Intrinsic::ctpop: { // popcount(A) == 0 -> A == 0 and likewise for != // popcount(A) == bitwidth(A) -> A == -1 and likewise for != bool IsZero = C.isNullValue(); - if (IsZero || C == C.getBitWidth()) { + if (IsZero || C == BitWidth) { Worklist.Add(II); Cmp.setOperand(0, II->getArgOperand(0)); auto *NewOp = diff --git a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll index d0ecb70c579b4..1ea608d5945b7 100644 --- a/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll +++ b/llvm/test/Transforms/InstCombine/cmp-intrinsic.ll @@ -54,8 +54,7 @@ define i1 @ctlz_eq_bitwidth_i32(i32 %x) { define i1 @ctlz_eq_zero_i32(i32 %x) { ; CHECK-LABEL: @ctlz_eq_zero_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LZ]], 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -65,8 +64,7 @@ define i1 @ctlz_eq_zero_i32(i32 %x) { define <2 x i1> @ctlz_ne_zero_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_zero_v2i32( -; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[A:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], zeroinitializer +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) @@ -76,8 +74,7 @@ define <2 x i1> @ctlz_ne_zero_v2i32(<2 x i32> %a) { define i1 @ctlz_eq_bw_minus_1_i32(i32 %x) { ; CHECK-LABEL: @ctlz_eq_bw_minus_1_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LZ]], 31 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -87,8 +84,7 @@ define i1 @ctlz_eq_bw_minus_1_i32(i32 %x) { define <2 x i1> @ctlz_ne_bw_minus_1_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_bw_minus_1_v2i32( -; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[A:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) @@ -98,8 +94,8 @@ define <2 x i1> @ctlz_ne_bw_minus_1_v2i32(<2 x i32> %a) { define i1 @ctlz_eq_other_i32(i32 %x) { ; CHECK-LABEL: @ctlz_eq_other_i32( -; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LZ]], 24 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -128 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 128 ; CHECK-NEXT: ret i1 [[CMP]] ; %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -109,8 +105,8 @@ define i1 @ctlz_eq_other_i32(i32 %x) { define <2 x i1> @ctlz_ne_other_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_other_v2i32( -; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[A:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false) @@ -118,6 +114,19 @@ define <2 x i1> @ctlz_ne_other_v2i32(<2 x i32> %a) { ret <2 x i1> %cmp } +define i1 @ctlz_eq_other_i32_multiuse(i32 %x, i32* %p) { +; CHECK-LABEL: @ctlz_eq_other_i32_multiuse( +; CHECK-NEXT: [[LZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !0 +; CHECK-NEXT: store i32 [[LZ]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LZ]], 24 +; CHECK-NEXT: ret i1 [[CMP]] +; + %lz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) + store i32 %lz, i32* %p + %cmp = icmp eq i32 %lz, 24 + ret i1 %cmp +} + define <2 x i1> @ctlz_ne_bitwidth_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @ctlz_ne_bitwidth_v2i32( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], zeroinitializer @@ -150,8 +159,8 @@ define <2 x i1> @cttz_eq_bitwidth_v2i32(<2 x i32> %a) { define i1 @cttz_eq_zero_i33(i33 %x) { ; CHECK-LABEL: @cttz_eq_zero_i33( -; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TZ]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i33 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -161,8 +170,8 @@ define i1 @cttz_eq_zero_i33(i33 %x) { define <2 x i1> @cttz_ne_zero_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @cttz_ne_zero_v2i32( -; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[A:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) @@ -172,8 +181,7 @@ define <2 x i1> @cttz_ne_zero_v2i32(<2 x i32> %a) { define i1 @cttz_eq_bw_minus_1_i33(i33 %x) { ; CHECK-LABEL: @cttz_eq_bw_minus_1_i33( -; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TZ]], 32 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[X:%.*]], -4294967296 ; CHECK-NEXT: ret i1 [[CMP]] ; %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -183,8 +191,7 @@ define i1 @cttz_eq_bw_minus_1_i33(i33 %x) { define <2 x i1> @cttz_ne_bw_minus_1_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @cttz_ne_bw_minus_1_v2i32( -; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[A:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[A:%.*]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) @@ -194,8 +201,8 @@ define <2 x i1> @cttz_ne_bw_minus_1_v2i32(<2 x i32> %a) { define i1 @cttz_eq_other_i33(i33 %x) { ; CHECK-LABEL: @cttz_eq_other_i33( -; CHECK-NEXT: [[TZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TZ]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = and i33 [[X:%.*]], 31 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[TMP1]], 16 ; CHECK-NEXT: ret i1 [[CMP]] ; %tz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) @@ -205,8 +212,8 @@ define i1 @cttz_eq_other_i33(i33 %x) { define <2 x i1> @cttz_ne_other_v2i32(<2 x i32> %a) { ; CHECK-LABEL: @cttz_ne_other_v2i32( -; CHECK-NEXT: [[X:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[A:%.*]], i1 false) -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[X]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %x = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false) @@ -214,6 +221,19 @@ define <2 x i1> @cttz_ne_other_v2i32(<2 x i32> %a) { ret <2 x i1> %cmp } +define i1 @cttz_eq_other_i33_multiuse(i33 %x, i33* %p) { +; CHECK-LABEL: @cttz_eq_other_i33_multiuse( +; CHECK-NEXT: [[LZ:%.*]] = tail call i33 @llvm.cttz.i33(i33 [[X:%.*]], i1 false), !range !1 +; CHECK-NEXT: store i33 [[LZ]], i33* [[P:%.*]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i33 [[LZ]], 4 +; CHECK-NEXT: ret i1 [[CMP]] +; + %lz = tail call i33 @llvm.cttz.i33(i33 %x, i1 false) + store i33 %lz, i33* %p + %cmp = icmp eq i33 %lz, 4 + ret i1 %cmp +} + define i1 @ctpop_eq_zero_i11(i11 %x) { ; CHECK-LABEL: @ctpop_eq_zero_i11( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i11 [[X:%.*]], 0 diff --git a/llvm/test/Transforms/InstCombine/intrinsics.ll b/llvm/test/Transforms/InstCombine/intrinsics.ll index e2ea1d5b535fe..8de892fd81b82 100644 --- a/llvm/test/Transforms/InstCombine/intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/intrinsics.ll @@ -354,13 +354,9 @@ define i1 @cttz_knownbits3(i32 %arg) { ret i1 %res } -; TODO: The icmp is unnecessary given the known bits of the input. define <2 x i1> @cttz_knownbits3_vec(<2 x i32> %arg) { ; CHECK-LABEL: @cttz_knownbits3_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[ARG:%.*]], -; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true) -; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i32> [[CNT]], -; CHECK-NEXT: ret <2 x i1> [[RES]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %or = or <2 x i32> %arg, %cnt = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %or, i1 true) nounwind readnone @@ -450,13 +446,9 @@ define i1 @ctlz_knownbits3(i8 %arg) { ret i1 %res } -; TODO: The icmp is unnecessary given the known bits of the input. define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) { ; CHECK-LABEL: @ctlz_knownbits3_vec( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ARG:%.*]], -; CHECK-NEXT: [[CNT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[OR]], i1 true) -; CHECK-NEXT: [[RES:%.*]] = icmp eq <2 x i8> [[CNT]], -; CHECK-NEXT: ret <2 x i1> [[RES]] +; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %or = or <2 x i8> %arg, %cnt = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %or, i1 true) nounwind readnone