diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index fd97a1283b65a..0543c211c4971 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -5032,7 +5032,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::CTTZ_ZERO_UNDEF: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - case ISD::CTPOP: + case ISD::CTPOP: { // Zero extend the argument unless its cttz, then use any_extend. if (Node->getOpcode() == ISD::CTTZ || Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF) @@ -5040,7 +5040,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { else Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); - if (Node->getOpcode() == ISD::CTTZ) { + unsigned NewOpc = Node->getOpcode(); + if (NewOpc == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. @@ -5048,12 +5049,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { OVT.getSizeInBits()); Tmp1 = DAG.getNode(ISD::OR, dl, NVT, Tmp1, DAG.getConstant(TopBit, dl, NVT)); + NewOpc = ISD::CTTZ_ZERO_UNDEF; } // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is // already the correct result. - Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); - if (Node->getOpcode() == ISD::CTLZ || - Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { + Tmp1 = DAG.getNode(NewOpc, dl, NVT, Tmp1); + if (NewOpc == ISD::CTLZ || NewOpc == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, DAG.getConstant(NVT.getSizeInBits() - @@ -5061,6 +5062,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; + } case ISD::BITREVERSE: case ISD::BSWAP: { unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0aa36deda79dc..98f64947bcabc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -709,23 +709,26 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { } } - if (N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::VP_CTTZ) { + unsigned NewOpc = N->getOpcode(); + if (NewOpc == ISD::CTTZ || NewOpc == ISD::VP_CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), OVT.getScalarSizeInBits()); - if (N->getOpcode() == ISD::CTTZ) + if (NewOpc == ISD::CTTZ) { Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); - else + NewOpc = ISD::CTTZ_ZERO_UNDEF; + } else { Op = DAG.getNode(ISD::VP_OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT), N->getOperand(1), N->getOperand(2)); + NewOpc = ISD::VP_CTTZ_ZERO_UNDEF; + } } if (!N->isVPOpcode()) - return DAG.getNode(N->getOpcode(), dl, NVT, Op); - return DAG.getNode(N->getOpcode(), dl, NVT, Op, N->getOperand(1), - N->getOperand(2)); + return DAG.getNode(NewOpc, dl, NVT, Op); + return DAG.getNode(NewOpc, dl, NVT, Op, N->getOperand(1), N->getOperand(2)); } SDValue DAGTypeLegalizer::PromoteIntRes_VP_CttzElements(SDNode *N) { diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll index 118d6c123046b..ee2894a66fbfc 100644 --- a/llvm/test/CodeGen/AMDGPU/cttz.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz.ll @@ -1408,7 +1408,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_or_b32_e32 v2, 0x10000, v0 ; VI-NEXT: v_ffbl_b32_e32 v2, v2 -; VI-NEXT: v_min_u32_e32 v2, 32, v2 ; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 ; VI-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc ; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 @@ -1451,7 +1450,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; GFX10-NEXT: v_or_b32_e32 v2, 0x10000, v1 ; GFX10-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v1 ; GFX10-NEXT: v_ffbl_b32_e32 v2, v2 -; GFX10-NEXT: v_min_u32_e32 v2, 32, v2 ; GFX10-NEXT: v_cndmask_b32_e32 v1, 0xffff, v2, vcc_lo ; GFX10-NEXT: global_store_short v0, v1, s[0:1] ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll index 71f1cd54d705c..392a44318b0a5 100644 --- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -1561,7 +1561,6 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias % ; VI-NEXT: v_or_b32_e32 v0, v2, v0 ; VI-NEXT: v_or_b32_e32 v2, 0x10000, v0 ; VI-NEXT: v_ffbl_b32_e32 v2, v2 -; VI-NEXT: v_min_u32_e32 v2, 32, v2 ; VI-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 ; VI-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc ; VI-NEXT: v_mov_b32_e32 v0, s0 diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index ef8a6c704a44b..4a001662ce2ca 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -4145,29 +4145,15 @@ define @vp_cttz_nxv1i9( %va,