diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 6a76ad7f5db74..01c41a8356d9d 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2458,13 +2458,20 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF; } + unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); + + if (MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { + // An optimization where the result is the CTLZ after the left shift by + // (Difference in widety and current ty). + MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc, + MIRBuilder.buildConstant(WideTy, SizeDiff)); + } + // Perform the operation at the larger size. auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc}); // This is already the correct result for CTPOP and CTTZs - if (MI.getOpcode() == TargetOpcode::G_CTLZ || - MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { + if (MI.getOpcode() == TargetOpcode::G_CTLZ) { // The correct result is NewOp - (Difference in widety and current ty). - unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits(); MIBNewOp = MIRBuilder.buildSub( WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bfc3e08c1632d..32ef742a42fd2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5026,7 +5026,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: // Zero extend the argument unless its cttz, then use any_extend. if (Node->getOpcode() == ISD::CTTZ || @@ -5047,8 +5046,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is // already the correct result. Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); - if (Node->getOpcode() == ISD::CTLZ || - Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { + if (Node->getOpcode() == ISD::CTLZ) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - @@ -5056,6 +5054,22 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; + case ISD::CTLZ_ZERO_UNDEF: + // We know that the argument is unlikely to be zero, hence we can take a + // different approach as compared to ISD::CTLZ + + // Any Extend the argument + Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0)); + + // Tmp1 = Tmp1 << (sizeinbits(NVT) - sizeinbits(Old VT)) + Tmp2 = DAG.getShiftAmountConstant(NVT.getSizeInBits() - OVT.getSizeInBits(), + Tmp1.getValueType(), dl); + Tmp1 = DAG.getNode(ISD::SHL, dl, NVT, Tmp1, Tmp2); + + // Perform the larger operation + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); + break; case ISD::BITREVERSE: case ISD::BSWAP: { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0aa36deda79dc..7701267dae819 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -646,21 +646,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { } } - // Zero extend to the promoted type and do the count there. - SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + unsigned CtlzOpcode = N->getOpcode(); + if (CtlzOpcode == ISD::CTLZ || CtlzOpcode == ISD::VP_CTLZ) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + + // Subtract off the extra leading bits in the bigger type. + SDValue ExtractLeadingBits = DAG.getConstant( + NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); + if (!N->isVPOpcode()) + return DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op), + ExtractLeadingBits); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + return DAG.getNode(ISD::VP_SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), + ExtractLeadingBits, Mask, EVL); + } + if (CtlzOpcode == ISD::CTLZ_ZERO_UNDEF || + CtlzOpcode == ISD::VP_CTLZ_ZERO_UNDEF) { + // Any Extend the argument + SDValue Op = GetPromotedInteger(N->getOperand(0)); + + // Op = Op << (sizeinbits(NVT) - sizeinbits(Old VT)) + unsigned SHLAmount = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + auto ShiftConst = + DAG.getShiftAmountConstant(SHLAmount, Op.getValueType(), dl); + if (!N->isVPOpcode()) { + Op = DAG.getNode(ISD::SHL, dl, NVT, Op, ShiftConst); + return DAG.getNode(CtlzOpcode, dl, NVT, Op); + } - // Subtract off the extra leading bits in the bigger type. - SDValue ExtractLeadingBits = DAG.getConstant( - NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); - if (!N->isVPOpcode()) - return DAG.getNode(ISD::SUB, dl, NVT, - DAG.getNode(N->getOpcode(), dl, NVT, Op), - ExtractLeadingBits); - SDValue Mask = N->getOperand(1); - SDValue EVL = N->getOperand(2); - return DAG.getNode(ISD::VP_SUB, dl, NVT, - DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), - ExtractLeadingBits, Mask, EVL); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + Op = DAG.getNode(ISD::VP_SHL, dl, NVT, Op, ShiftConst, Mask, EVL); + return DAG.getNode(CtlzOpcode, dl, NVT, Op, Mask, EVL); + } + llvm_unreachable("Invalid CTLZ Opcode"); } SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { diff --git a/llvm/test/CodeGen/AArch64/ctlz_zero_undef.ll b/llvm/test/CodeGen/AArch64/ctlz_zero_undef.ll new file mode 100644 index 0000000000000..943ff72633ca6 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ctlz_zero_undef.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s --mtriple=aarch64 | FileCheck %s + +declare i8 @llvm.ctlz.i8(i8, i1 immarg) +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1 immarg) +declare i11 @llvm.ctlz.i11(i11, i1 immarg) + +define i32 @clz_nzu8(i8 %self) { +; CHECK-LABEL: clz_nzu8: +; CHECK: // %bb.0: // %start +; CHECK-NEXT: lsl w8, w0, #24 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret +start: + %ctlz_res = call i8 @llvm.ctlz.i8(i8 %self, i1 true) + %ret = zext i8 %ctlz_res to i32 + ret i32 %ret +} + +; non standard bit size argument to ctlz +define i32 @clz_nzu11(i11 %self) { +; CHECK-LABEL: clz_nzu11: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl w8, w0, #21 +; CHECK-NEXT: clz w0, w8 +; CHECK-NEXT: ret + %ctlz_res = call i11 @llvm.ctlz.i11(i11 %self, i1 true) + %ret = zext i11 %ctlz_res to i32 + ret i32 %ret +} + +; vector type argument to ctlz intrinsic +define <8 x i32> @clz_vec_nzu8(<8 x i8> %self) { +; CHECK-LABEL: clz_vec_nzu8: +; CHECK: // %bb.0: +; CHECK-NEXT: clz v0.8b, v0.8b +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret + %ctlz_res = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %self, i1 true) + %ret = zext <8 x i8> %ctlz_res to <8 x i32> + ret <8 x i32> %ret +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir index fed277d7d10d0..1e36da6d09126 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz-zero-undef.mir @@ -83,12 +83,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[SHL]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_CTLZ_ZERO_UNDEF %1 @@ -151,16 +150,14 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[LSHR]](s32) - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF1]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[SHL]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[SHL1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTLZ_ZERO_UNDEF1]](s32) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL2]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -181,12 +178,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ_ZERO_UNDEF]], [[C1]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[SHL]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[CTLZ_ZERO_UNDEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s7) = G_TRUNC %0 %2:_(s7) = G_CTLZ_ZERO_UNDEF %1 @@ -207,11 +203,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[AND]](s64) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) - ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[CTLZ_ZERO_UNDEF]], [[UV]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[USUBO]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[AND]], [[C1]](s32) + ; CHECK-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[SHL]](s64) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTLZ_ZERO_UNDEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[ZEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s33) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index 21aff62b9226d..e7852fc209001 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -372,8 +372,8 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_and_b32 s0, s4, 0xff +; GFX9-GISEL-NEXT: s_lshl_b32 s0, s0, 24 ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0 -; GFX9-GISEL-NEXT: s_sub_i32 s0, s0, 24 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[2:3] ; GFX9-GISEL-NEXT: s_endpgm @@ -404,9 +404,8 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no ; VI-NEXT: s_load_dword s2, s[0:1], 0x2c ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; VI-NEXT: s_waitcnt lgkmcnt(0) -; VI-NEXT: s_and_b32 s2, s2, 0xffff +; VI-NEXT: s_lshl_b32 s2, s2, 16 ; VI-NEXT: s_flbit_i32_b32 s2, s2 -; VI-NEXT: s_add_i32 s2, s2, -16 ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: v_mov_b32_e32 v2, s2 @@ -449,8 +448,8 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_and_b32 s0, s4, 0xffff +; GFX9-GISEL-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9-GISEL-NEXT: s_flbit_i32_b32 s0, s0 -; GFX9-GISEL-NEXT: s_sub_i32 s0, s0, 16 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-GISEL-NEXT: global_store_short v1, v0, s[2:3] ; GFX9-GISEL-NEXT: s_endpgm @@ -651,9 +650,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: global_load_ubyte v1, v0, s[2:3] ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v1 -; GFX9-GISEL-NEXT: v_subrev_u32_e32 v2, 24, v2 -; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v1 +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2 ; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc ; GFX9-GISEL-NEXT: global_store_byte v0, v1, s[0:1] @@ -708,8 +706,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no ; VI-NEXT: v_lshlrev_b16_e32 v1, 8, v2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_or_b32_e32 v0, v1, v0 -; VI-NEXT: v_ffbh_u32_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 -; VI-NEXT: v_add_u32_e32 v1, vcc, -16, v1 +; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v0 +; VI-NEXT: v_ffbh_u32_e32 v1, v1 ; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 ; VI-NEXT: v_cndmask_b32_e32 v2, 32, v1, vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 @@ -756,9 +754,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no ; GFX9-GISEL-NEXT: global_load_ubyte v2, v0, s[2:3] offset:1 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v2, 8, v1 -; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v1 -; GFX9-GISEL-NEXT: v_subrev_u32_e32 v2, 16, v2 -; GFX9-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v1 +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2 ; GFX9-GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v1, 32, v2, vcc ; GFX9-GISEL-NEXT: global_store_short v0, v1, s[0:1] @@ -1164,8 +1161,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8(ptr addrspace(1) noalias %out, p ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v0, v0 -; GFX9-GISEL-NEXT: v_subrev_u32_e32 v0, 24, v0 ; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[0:1] ; GFX9-GISEL-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1702,8 +1699,8 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v3, vcc ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) -; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v0 -; GFX9-GISEL-NEXT: v_subrev_u32_e32 v1, 24, v1 +; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 24, v0 +; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v1, v1 ; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 ; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 diff --git a/llvm/test/CodeGen/Hexagon/bitmanip.ll b/llvm/test/CodeGen/Hexagon/bitmanip.ll index 9ce7f0576506c..2c21af62d6f39 100644 --- a/llvm/test/CodeGen/Hexagon/bitmanip.ll +++ b/llvm/test/CodeGen/Hexagon/bitmanip.ll @@ -50,13 +50,10 @@ define i16 @ctlz_i16(i16 %a0) #0 { ; CHECK: .cfi_startproc ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { -; CHECK-NEXT: r0 = zxth(r0) +; CHECK-NEXT: r0 = aslh(r0) ; CHECK-NEXT: } ; CHECK-NEXT: { ; CHECK-NEXT: r0 = cl0(r0) -; CHECK-NEXT: } -; CHECK-NEXT: { -; CHECK-NEXT: r0 = add(r0,#-16) ; CHECK-NEXT: jumpr r31 ; CHECK-NEXT: } %v0 = tail call i16 @llvm.ctlz.i16(i16 %a0, i1 true) #1 diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index 549d531e829ea..be8ad11e3e2da 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -1671,30 +1671,26 @@ define i8 @test_ctlz_i8_zero_undef(i8 %a) nounwind { ; ; RV32ZBB-LABEL: test_ctlz_i8_zero_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: andi a0, a0, 255 +; RV32ZBB-NEXT: slli a0, a0, 24 ; RV32ZBB-NEXT: clz a0, a0 -; RV32ZBB-NEXT: addi a0, a0, -24 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_ctlz_i8_zero_undef: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: andi a0, a0, 255 +; RV64ZBB-NEXT: slli a0, a0, 56 ; RV64ZBB-NEXT: clz a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -56 ; RV64ZBB-NEXT: ret ; ; RV32XTHEADBB-LABEL: test_ctlz_i8_zero_undef: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: andi a0, a0, 255 +; RV32XTHEADBB-NEXT: slli a0, a0, 24 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 -; RV32XTHEADBB-NEXT: addi a0, a0, -24 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctlz_i8_zero_undef: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: andi a0, a0, 255 +; RV64XTHEADBB-NEXT: slli a0, a0, 56 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0 -; RV64XTHEADBB-NEXT: addi a0, a0, -56 ; RV64XTHEADBB-NEXT: ret %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 true) ret i8 %tmp @@ -1771,30 +1767,26 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind { ; ; RV32ZBB-LABEL: test_ctlz_i16_zero_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: zext.h a0, a0 +; RV32ZBB-NEXT: slli a0, a0, 16 ; RV32ZBB-NEXT: clz a0, a0 -; RV32ZBB-NEXT: addi a0, a0, -16 ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: test_ctlz_i16_zero_undef: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: zext.h a0, a0 +; RV64ZBB-NEXT: slli a0, a0, 48 ; RV64ZBB-NEXT: clz a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -48 ; RV64ZBB-NEXT: ret ; ; RV32XTHEADBB-LABEL: test_ctlz_i16_zero_undef: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV32XTHEADBB-NEXT: slli a0, a0, 16 ; RV32XTHEADBB-NEXT: th.ff1 a0, a0 -; RV32XTHEADBB-NEXT: addi a0, a0, -16 ; RV32XTHEADBB-NEXT: ret ; ; RV64XTHEADBB-LABEL: test_ctlz_i16_zero_undef: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a0, a0, 15, 0 +; RV64XTHEADBB-NEXT: slli a0, a0, 48 ; RV64XTHEADBB-NEXT: th.ff1 a0, a0 -; RV64XTHEADBB-NEXT: addi a0, a0, -48 ; RV64XTHEADBB-NEXT: ret %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 true) ret i16 %tmp diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll index acd63f24bb8f7..80d3add385969 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll @@ -236,13 +236,13 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind { ; ; RV64XTHEADBB-LABEL: findLastSet_i32: ; RV64XTHEADBB: # %bb.0: -; RV64XTHEADBB-NEXT: th.extu a1, a0, 31, 0 +; RV64XTHEADBB-NEXT: slli a1, a0, 32 ; RV64XTHEADBB-NEXT: th.ff1 a1, a1 -; RV64XTHEADBB-NEXT: addiw a1, a1, -32 ; RV64XTHEADBB-NEXT: xori a1, a1, 31 ; RV64XTHEADBB-NEXT: snez a0, a0 -; RV64XTHEADBB-NEXT: addiw a0, a0, -1 +; RV64XTHEADBB-NEXT: addi a0, a0, -1 ; RV64XTHEADBB-NEXT: or a0, a0, a1 +; RV64XTHEADBB-NEXT: sext.w a0, a0 ; RV64XTHEADBB-NEXT: ret %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 true) %2 = xor i32 31, %1 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index 2a75e5ce7175d..7cdf594f45507 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2606,10 +2606,8 @@ define @vp_ctlz_nxv1i9( %va, @vp_ctlz_zero_undef_nxv1i9( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i9: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 511 -; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma -; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 7, v0.t ; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t @@ -2617,19 +2615,13 @@ define @vp_ctlz_zero_undef_nxv1i9( %va, @llvm.vp.ctlz.nxv1i9( %va, i1 true, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-01.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-01.ll index e932210d3e71f..da687095045ff 100644 --- a/llvm/test/CodeGen/SystemZ/scalar-ctlz-01.ll +++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-01.ll @@ -43,10 +43,10 @@ define i32 @f2(i32 %arg) { define i32 @f3(i32 %arg) { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: -; CHECK-NEXT: llgfr %r0, %r2 +; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d +; CHECK-NEXT: sllg %r0, %r2, 32 ; CHECK-NEXT: flogr %r2, %r0 -; CHECK-NEXT: aghi %r2, -32 -; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2q ; CHECK-NEXT: br %r14 %1 = tail call i32 @llvm.ctlz.i32(i32 %arg, i1 true) ret i32 %1 @@ -69,10 +69,9 @@ define i16 @f5(i16 %arg) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d -; CHECK-NEXT: llghr %r0, %r2 -; CHECK-NEXT: flogr %r0, %r0 -; CHECK-NEXT: aghi %r0, -32 -; CHECK-NEXT: ahik %r2, %r0, -16 +; CHECK-NEXT: sllg %r0, %r2, 48 +; CHECK-NEXT: flogr %r2, %r0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2q ; CHECK-NEXT: br %r14 %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true) ret i16 %1 @@ -95,10 +94,9 @@ define i8 @f7(i8 %arg) { ; CHECK-LABEL: f7: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d -; CHECK-NEXT: llgcr %r0, %r2 -; CHECK-NEXT: flogr %r0, %r0 -; CHECK-NEXT: aghi %r0, -32 -; CHECK-NEXT: ahik %r2, %r0, -24 +; CHECK-NEXT: sllg %r0, %r2, 56 +; CHECK-NEXT: flogr %r2, %r0 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2q ; CHECK-NEXT: br %r14 %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true) ret i8 %1 diff --git a/llvm/test/CodeGen/VE/Scalar/ctlz.ll b/llvm/test/CodeGen/VE/Scalar/ctlz.ll index c8c2b11c5eef6..602b9a86bf032 100644 --- a/llvm/test/CodeGen/VE/Scalar/ctlz.ll +++ b/llvm/test/CodeGen/VE/Scalar/ctlz.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s declare i128 @llvm.ctlz.i128(i128, i1) @@ -31,9 +32,8 @@ define i64 @func64(i64 %p) { define signext i32 @func32s(i32 signext %p) { ; CHECK-LABEL: func32s: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: sll %s0, %s0, 32 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true) ret i32 %r @@ -42,9 +42,8 @@ define signext i32 @func32s(i32 signext %p) { define zeroext i32 @func32z(i32 zeroext %p) { ; CHECK-LABEL: func32z: ; CHECK: # %bb.0: +; CHECK-NEXT: sll %s0, %s0, 32 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: and %s0, %s0, (32)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i32 @llvm.ctlz.i32(i32 %p, i1 true) ret i32 %r @@ -53,11 +52,8 @@ define zeroext i32 @func32z(i32 zeroext %p) { define signext i16 @func16s(i16 signext %p) { ; CHECK-LABEL: func16s: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (48)0 +; CHECK-NEXT: sll %s0, %s0, 48 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: adds.w.sx %s0, -16, %s0 -; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 true) ret i16 %r @@ -66,10 +62,8 @@ define signext i16 @func16s(i16 signext %p) { define zeroext i16 @func16z(i16 zeroext %p) { ; CHECK-LABEL: func16z: ; CHECK: # %bb.0: +; CHECK-NEXT: sll %s0, %s0, 48 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: adds.w.sx %s0, -16, %s0 -; CHECK-NEXT: and %s0, %s0, (48)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i16 @llvm.ctlz.i16(i16 %p, i1 true) ret i16 %r @@ -78,11 +72,8 @@ define zeroext i16 @func16z(i16 zeroext %p) { define signext i8 @func8s(i8 signext %p) { ; CHECK-LABEL: func8s: ; CHECK: # %bb.0: -; CHECK-NEXT: and %s0, %s0, (56)0 +; CHECK-NEXT: sll %s0, %s0, 56 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: adds.w.sx %s0, -24, %s0 -; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 true) ret i8 %r @@ -91,10 +82,8 @@ define signext i8 @func8s(i8 signext %p) { define zeroext i8 @func8z(i8 zeroext %p) { ; CHECK-LABEL: func8z: ; CHECK: # %bb.0: +; CHECK-NEXT: sll %s0, %s0, 56 ; CHECK-NEXT: ldz %s0, %s0 -; CHECK-NEXT: lea %s0, -32(, %s0) -; CHECK-NEXT: adds.w.sx %s0, -24, %s0 -; CHECK-NEXT: and %s0, %s0, (56)0 ; CHECK-NEXT: b.l.t (, %s10) %r = tail call i8 @llvm.ctlz.i8(i8 %p, i1 true) ret i8 %r diff --git a/llvm/test/CodeGen/X86/clz.ll b/llvm/test/CodeGen/X86/clz.ll index 92cbc16590247..7688959cb5335 100644 --- a/llvm/test/CodeGen/X86/clz.ll +++ b/llvm/test/CodeGen/X86/clz.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOCMOV ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=X86,X86-CMOV ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 @@ -225,33 +225,31 @@ define i8 @ctlz_i8(i8 %x) { ; ; X86-CLZ-LABEL: ctlz_i8: ; X86-CLZ: # %bb.0: -; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-CLZ-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-CLZ-NEXT: shll $24, %eax ; X86-CLZ-NEXT: lzcntl %eax, %eax -; X86-CLZ-NEXT: addl $-24, %eax ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X86-CLZ-NEXT: retl ; ; X64-CLZ-LABEL: ctlz_i8: ; X64-CLZ: # %bb.0: -; X64-CLZ-NEXT: movzbl %dil, %eax -; X64-CLZ-NEXT: lzcntl %eax, %eax -; X64-CLZ-NEXT: addl $-24, %eax +; X64-CLZ-NEXT: shll $24, %edi +; X64-CLZ-NEXT: lzcntl %edi, %eax ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X64-CLZ-NEXT: retq ; ; X64-FASTLZCNT-LABEL: ctlz_i8: ; X64-FASTLZCNT: # %bb.0: -; X64-FASTLZCNT-NEXT: movzbl %dil, %eax -; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X64-FASTLZCNT-NEXT: addl $-24, %eax +; X64-FASTLZCNT-NEXT: shll $24, %edi +; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X64-FASTLZCNT-NEXT: retq ; ; X86-FASTLZCNT-LABEL: ctlz_i8: ; X86-FASTLZCNT: # %bb.0: -; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FASTLZCNT-NEXT: shll $24, %eax ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X86-FASTLZCNT-NEXT: addl $-24, %eax ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-FASTLZCNT-NEXT: retl %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true ) @@ -1154,8 +1152,8 @@ define i8 @ctlz_i8_knownbits(i8 %x) { ; X86-CLZ-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-CLZ-NEXT: orb $64, %al ; X86-CLZ-NEXT: movzbl %al, %eax +; X86-CLZ-NEXT: shll $24, %eax ; X86-CLZ-NEXT: lzcntl %eax, %eax -; X86-CLZ-NEXT: addl $-24, %eax ; X86-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X86-CLZ-NEXT: retl ; @@ -1163,8 +1161,8 @@ define i8 @ctlz_i8_knownbits(i8 %x) { ; X64-CLZ: # %bb.0: ; X64-CLZ-NEXT: orb $64, %dil ; X64-CLZ-NEXT: movzbl %dil, %eax +; X64-CLZ-NEXT: shll $24, %eax ; X64-CLZ-NEXT: lzcntl %eax, %eax -; X64-CLZ-NEXT: addl $-24, %eax ; X64-CLZ-NEXT: # kill: def $al killed $al killed $eax ; X64-CLZ-NEXT: retq ; @@ -1172,8 +1170,8 @@ define i8 @ctlz_i8_knownbits(i8 %x) { ; X64-FASTLZCNT: # %bb.0: ; X64-FASTLZCNT-NEXT: orb $64, %dil ; X64-FASTLZCNT-NEXT: movzbl %dil, %eax +; X64-FASTLZCNT-NEXT: shll $24, %eax ; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X64-FASTLZCNT-NEXT: addl $-24, %eax ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X64-FASTLZCNT-NEXT: retq ; @@ -1182,8 +1180,8 @@ define i8 @ctlz_i8_knownbits(i8 %x) { ; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-FASTLZCNT-NEXT: orb $64, %al ; X86-FASTLZCNT-NEXT: movzbl %al, %eax +; X86-FASTLZCNT-NEXT: shll $24, %eax ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X86-FASTLZCNT-NEXT: addl $-24, %eax ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-FASTLZCNT-NEXT: retl @@ -1581,18 +1579,17 @@ define i8 @ctlz_xor7_i8_true(i8 %x) { ; ; X64-FASTLZCNT-LABEL: ctlz_xor7_i8_true: ; X64-FASTLZCNT: # %bb.0: -; X64-FASTLZCNT-NEXT: movzbl %dil, %eax -; X64-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X64-FASTLZCNT-NEXT: addl $-24, %eax +; X64-FASTLZCNT-NEXT: shll $24, %edi +; X64-FASTLZCNT-NEXT: lzcntl %edi, %eax ; X64-FASTLZCNT-NEXT: xorb $7, %al ; X64-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X64-FASTLZCNT-NEXT: retq ; ; X86-FASTLZCNT-LABEL: ctlz_xor7_i8_true: ; X86-FASTLZCNT: # %bb.0: -; X86-FASTLZCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-FASTLZCNT-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-FASTLZCNT-NEXT: shll $24, %eax ; X86-FASTLZCNT-NEXT: lzcntl %eax, %eax -; X86-FASTLZCNT-NEXT: addl $-24, %eax ; X86-FASTLZCNT-NEXT: xorb $7, %al ; X86-FASTLZCNT-NEXT: # kill: def $al killed $al killed $eax ; X86-FASTLZCNT-NEXT: retl diff --git a/llvm/test/CodeGen/X86/lzcnt.ll b/llvm/test/CodeGen/X86/lzcnt.ll index 68cef3f9363f9..b000401973416 100644 --- a/llvm/test/CodeGen/X86/lzcnt.ll +++ b/llvm/test/CodeGen/X86/lzcnt.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s -mtriple=i686-- -mattr=+lzcnt | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=+lzcnt | FileCheck %s --check-prefix=X32 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+lzcnt | FileCheck %s --check-prefix=X64 @@ -106,25 +106,23 @@ define i64 @t4(i64 %x) nounwind { define i8 @t5(i8 %x) nounwind { ; X86-LABEL: t5: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shll $24, %eax ; X86-NEXT: lzcntl %eax, %eax -; X86-NEXT: addl $-24, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X32-LABEL: t5: ; X32: # %bb.0: -; X32-NEXT: movzbl %dil, %eax -; X32-NEXT: lzcntl %eax, %eax -; X32-NEXT: addl $-24, %eax +; X32-NEXT: shll $24, %edi +; X32-NEXT: lzcntl %edi, %eax ; X32-NEXT: # kill: def $al killed $al killed $eax ; X32-NEXT: retq ; ; X64-LABEL: t5: ; X64: # %bb.0: -; X64-NEXT: movzbl %dil, %eax -; X64-NEXT: lzcntl %eax, %eax -; X64-NEXT: addl $-24, %eax +; X64-NEXT: shll $24, %edi +; X64-NEXT: lzcntl %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %tmp = tail call i8 @llvm.ctlz.i8( i8 %x, i1 true ) diff --git a/llvm/test/CodeGen/X86/pr38539.ll b/llvm/test/CodeGen/X86/pr38539.ll index ace78b38d53ed..0987e00266380 100644 --- a/llvm/test/CodeGen/X86/pr38539.ll +++ b/llvm/test/CodeGen/X86/pr38539.ll @@ -22,79 +22,84 @@ define void @f() nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $160, %esp -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: subl $176, %esp +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movzbl (%eax), %eax ; X86-NEXT: movzbl (%eax), %ecx ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; X86-NEXT: divb %cl -; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %edi, %eax ; X86-NEXT: shll $30, %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: sarl $30, %ecx ; X86-NEXT: sarl $31, %eax ; X86-NEXT: shrdl $1, %eax, %ecx -; X86-NEXT: xorl %eax, %edx ; X86-NEXT: xorl %eax, %edi +; X86-NEXT: xorl %eax, %edx ; X86-NEXT: xorl %ecx, %esi ; X86-NEXT: subl %ecx, %esi -; X86-NEXT: sbbl %eax, %edi ; X86-NEXT: sbbl %eax, %edx -; X86-NEXT: andl $3, %edx -; X86-NEXT: testl %edi, %edi +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: shldl $30, %edx, %ecx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shldl $30, %esi, %edx +; X86-NEXT: testl %ecx, %ecx ; X86-NEXT: jne .LBB0_1 ; X86-NEXT: # %bb.2: # %BB_udiv-special-cases -; X86-NEXT: bsrl %esi, %eax +; X86-NEXT: bsrl %edx, %eax ; X86-NEXT: xorl $31, %eax ; X86-NEXT: addl $32, %eax ; X86-NEXT: jmp .LBB0_3 ; X86-NEXT: .LBB0_1: -; X86-NEXT: bsrl %edi, %eax +; X86-NEXT: bsrl %ecx, %eax ; X86-NEXT: xorl $31, %eax ; X86-NEXT: .LBB0_3: # %BB_udiv-special-cases -; X86-NEXT: xorl %ecx, %ecx -; X86-NEXT: testl %edx, %edx ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shll $30, %esi ; X86-NEXT: jne .LBB0_4 ; X86-NEXT: # %bb.5: # %BB_udiv-special-cases -; X86-NEXT: addl $64, %eax -; X86-NEXT: jmp .LBB0_6 +; X86-NEXT: movl $64, %esi +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: je .LBB0_7 +; X86-NEXT: jmp .LBB0_8 ; X86-NEXT: .LBB0_4: -; X86-NEXT: bsrl %edx, %eax -; X86-NEXT: xorl $31, %eax -; X86-NEXT: addl $32, %eax -; X86-NEXT: .LBB0_6: # %BB_udiv-special-cases -; X86-NEXT: subl $62, %eax -; X86-NEXT: movl $0, %ebx -; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: bsrl %esi, %esi +; X86-NEXT: xorl $31, %esi +; X86-NEXT: orl %ecx, %edx +; X86-NEXT: jne .LBB0_8 +; X86-NEXT: .LBB0_7: # %BB_udiv-special-cases +; X86-NEXT: addl $64, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: .LBB0_8: # %BB_udiv-special-cases +; X86-NEXT: leal {{[0-9]+}}(%esp), %esi ; X86-NEXT: addl $-66, %eax +; X86-NEXT: movl $0, %ebx ; X86-NEXT: adcl $-1, %ebx -; X86-NEXT: adcl $3, %ecx -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl $0, %edx +; X86-NEXT: adcl $3, %edx ; X86-NEXT: movb $1, %cl ; X86-NEXT: testb %cl, %cl -; X86-NEXT: jne .LBB0_11 -; X86-NEXT: # %bb.7: # %BB_udiv-special-cases -; X86-NEXT: andl $3, %esi +; X86-NEXT: jne .LBB0_14 +; X86-NEXT: # %bb.9: # %BB_udiv-special-cases +; X86-NEXT: andl $3, %edx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl $65, %ecx -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %esi, %ecx +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: orl %ebx, %ecx -; X86-NEXT: je .LBB0_11 -; X86-NEXT: # %bb.8: # %udiv-bb1 +; X86-NEXT: je .LBB0_14 +; X86-NEXT: # %bb.10: # %udiv-bb1 ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: addl $1, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: adcl $0, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: adcl $0, %ebx -; X86-NEXT: andl $3, %ebx +; X86-NEXT: adcl $0, %edx +; X86-NEXT: andl $3, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movb $65, %cl ; X86-NEXT: subb %al, %cl ; X86-NEXT: movb %cl, %ch @@ -102,7 +107,7 @@ define void @f() nounwind { ; X86-NEXT: shrb $3, %cl ; X86-NEXT: andb $15, %cl ; X86-NEXT: negb %cl -; X86-NEXT: movsbl %cl, %eax +; X86-NEXT: movsbl %cl, %esi ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -112,28 +117,27 @@ define void @f() nounwind { ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 120(%esp,%eax), %edi +; X86-NEXT: movl 136(%esp,%esi), %edi ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shll %cl, %edi ; X86-NEXT: notb %cl -; X86-NEXT: movl 112(%esp,%eax), %esi -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl 116(%esp,%eax), %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: shrl %eax -; X86-NEXT: shrl %cl, %eax +; X86-NEXT: movl 128(%esp,%esi), %ebx +; X86-NEXT: movl 132(%esp,%esi), %eax +; X86-NEXT: movl %eax, %esi +; X86-NEXT: shrl %esi +; X86-NEXT: shrl %cl, %esi ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shldl %cl, %esi, %edx -; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: shll %cl, %esi -; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: shll %cl, %ebx ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: orl %ebx, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: orl %edx, %ecx ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; X86-NEXT: je .LBB0_11 -; X86-NEXT: # %bb.9: # %udiv-preheader -; X86-NEXT: orl %eax, %edi +; X86-NEXT: je .LBB0_13 +; X86-NEXT: # %bb.11: # %udiv-preheader +; X86-NEXT: andl $3, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: orl %esi, %edi ; X86-NEXT: andl $3, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) @@ -150,19 +154,19 @@ define void @f() nounwind { ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: shrb $3, %al ; X86-NEXT: andb $15, %al -; X86-NEXT: movzbl %al, %eax -; X86-NEXT: movl 64(%esp,%eax), %edi -; X86-NEXT: movl 68(%esp,%eax), %edx -; X86-NEXT: movl %edx, %esi +; X86-NEXT: movzbl %al, %edx +; X86-NEXT: movl 80(%esp,%edx), %edi +; X86-NEXT: movl 84(%esp,%edx), %eax +; X86-NEXT: movl %eax, %esi ; X86-NEXT: movb %ch, %cl ; X86-NEXT: shrl %cl, %esi ; X86-NEXT: notb %cl -; X86-NEXT: movl 72(%esp,%eax), %ebx +; X86-NEXT: movl 88(%esp,%edx), %ebx ; X86-NEXT: addl %ebx, %ebx ; X86-NEXT: shll %cl, %ebx ; X86-NEXT: orl %esi, %ebx ; X86-NEXT: movb %ch, %cl -; X86-NEXT: shrdl %cl, %edx, %edi +; X86-NEXT: shrdl %cl, %eax, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -178,51 +182,52 @@ define void @f() nounwind { ; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; X86-NEXT: xorl %ecx, %ecx ; X86-NEXT: .p2align 4, 0x90 -; X86-NEXT: .LBB0_10: # %udiv-do-while +; X86-NEXT: .LBB0_12: # %udiv-do-while ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl %ebx, %edi ; X86-NEXT: shldl $1, %ebx, %ecx -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-NEXT: shldl $1, %ebx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: shldl $1, %edi, %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X86-NEXT: movl %esi, %edx -; X86-NEXT: andl $2, %edx -; X86-NEXT: shrl %edx -; X86-NEXT: leal (%edx,%ebx,2), %ebx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: andl $2, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: leal (%eax,%edi,2), %edi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NEXT: shldl $1, %edx, %esi -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: orl %ebx, %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: shldl $1, %eax, %edx -; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: orl %ebx, %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: addl %eax, %eax ; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: andl $3, %esi ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload -; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-NEXT: sbbl %edi, %edx +; X86-NEXT: cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: sbbl %ebx, %eax ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-NEXT: sbbl %ecx, %esi ; X86-NEXT: shll $30, %esi -; X86-NEXT: movl %esi, %edx -; X86-NEXT: sarl $30, %edx -; X86-NEXT: sarl $31, %esi -; X86-NEXT: shrdl $1, %esi, %edx -; X86-NEXT: movl %edx, %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: movl %esi, %eax +; X86-NEXT: sarl $30, %eax +; X86-NEXT: sarl $31, %esi +; X86-NEXT: shrdl $1, %esi, %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl $1, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %esi, %edx +; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload ; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; X86-NEXT: subl %edx, %ebx -; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: movl %edi, %ebx -; X86-NEXT: sbbl %eax, %ecx +; X86-NEXT: subl %eax, %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %esi, %ebx +; X86-NEXT: sbbl %edx, %ecx ; X86-NEXT: andl $3, %ecx ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: addl $-1, %eax @@ -236,12 +241,13 @@ define void @f() nounwind { ; X86-NEXT: orl %esi, %eax ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: orl %edx, %eax -; X86-NEXT: jne .LBB0_10 -; X86-NEXT: .LBB0_11: # %udiv-end +; X86-NEXT: jne .LBB0_12 +; X86-NEXT: .LBB0_13: # %udiv-loop-exit +; X86-NEXT: leal {{[0-9]+}}(%esp), %esi +; X86-NEXT: .LBB0_14: # %udiv-end ; X86-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload ; X86-NEXT: setne (%eax) -; X86-NEXT: leal {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, (%eax) +; X86-NEXT: movl %esi, (%eax) ; X86-NEXT: movb $0, (%eax) ; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 831d7e6292e33..0932938b209a4 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -610,10 +610,10 @@ TEST_F(AArch64GISelMITest, WidenBitCountingCTLZZeroUndef) { auto CheckStr = R"( CHECK: [[Trunc:%[0-9]+]]:_(s8) = G_TRUNC CHECK: [[Zext:%[0-9]+]]:_(s16) = G_ZEXT [[Trunc]] - CHECK: [[CtlzZu:%[0-9]+]]:_(s16) = G_CTLZ_ZERO_UNDEF [[Zext]] CHECK: [[Cst8:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - CHECK: [[Sub:%[0-9]+]]:_(s16) = G_SUB [[CtlzZu]]:_, [[Cst8]]:_ - CHECK: [[Trunc:%[0-9]+]]:_(s8) = G_TRUNC [[Sub]] + CHECK: [[Shl:%[0-9]+]]:_(s16) = G_SHL [[Zext]]:_, [[Cst8]]:_ + CHECK: [[CtlzZu:%[0-9]+]]:_(s16) = G_CTLZ_ZERO_UNDEF [[Shl]] + CHECK: [[Trunc:%[0-9]+]]:_(s8) = G_TRUNC [[CtlzZu]] )"; // Check