diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f9b484b98739f..1258060444059 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2752,7 +2752,7 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const { case MVT::i8: case MVT::i16: case MVT::i32: - return true; + return Subtarget.hasVInstructions(); case MVT::i64: return Subtarget.hasVInstructionsI64(); case MVT::f16: @@ -24840,12 +24840,16 @@ bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable( } bool RISCVTargetLowering::isCtpopFast(EVT VT) const { - if (VT.isScalableVector()) - return isTypeLegal(VT) && Subtarget.hasStdExtZvbb(); - if (VT.isFixedLengthVector() && Subtarget.hasStdExtZvbb()) - return true; - return Subtarget.hasCPOPLike() && - (VT == MVT::i32 || VT == MVT::i64 || VT.isFixedLengthVector()); + if (VT.isVector()) { + EVT SVT = VT.getVectorElementType(); + // If the element type is legal we can use cpop.v if it is enabled. + if (isLegalElementTypeForRVV(SVT)) + return Subtarget.hasStdExtZvbb(); + // Don't consider it fast if the type needs to be legalized or scalarized. + return false; + } + + return Subtarget.hasCPOPLike() && (VT == MVT::i32 || VT == MVT::i64); } unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 3b3ef72e32aa7..a1a843a7c1ba7 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -423,100 +423,62 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind { } define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind { -; RV32I-LABEL: ctpop_v2i32_ult_two: -; RV32I: # %bb.0: -; RV32I-NEXT: addi a2, a0, -1 -; RV32I-NEXT: addi a3, a1, -1 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: seqz a0, a0 -; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: ctpop_v2i32_ult_two: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: sltiu a0, a0, 2 -; RV32ZBB-NEXT: sltiu a1, a1, 2 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i32_ult_two: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: and a1, a1, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: ret %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) %2 = icmp ult <2 x i32> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind { -; RV32I-LABEL: ctpop_v2i32_ugt_one: -; RV32I: # %bb.0: -; RV32I-NEXT: addi a2, a0, -1 -; RV32I-NEXT: addi a3, a1, -1 -; RV32I-NEXT: and a1, a1, a3 -; RV32I-NEXT: and a0, a0, a2 -; RV32I-NEXT: snez a0, a0 -; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: ctpop_v2i32_ugt_one: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: sltiu a0, a0, 2 -; RV32ZBB-NEXT: sltiu a1, a1, 2 -; RV32ZBB-NEXT: xori a0, a0, 1 -; RV32ZBB-NEXT: xori a1, a1, 1 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i32_ugt_one: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: and a1, a1, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: ret %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) %2 = icmp ugt <2 x i32> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind { -; RV32I-LABEL: ctpop_v2i32_eq_one: -; RV32I: # %bb.0: -; RV32I-NEXT: addi a2, a0, -1 -; RV32I-NEXT: addi a3, a1, -1 -; RV32I-NEXT: xor a1, a1, a3 -; RV32I-NEXT: xor a0, a0, a2 -; RV32I-NEXT: sltu a0, a2, a0 -; RV32I-NEXT: sltu a1, a3, a1 -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: ctpop_v2i32_eq_one: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: addi a0, a0, -1 -; RV32ZBB-NEXT: addi a1, a1, -1 -; RV32ZBB-NEXT: seqz a0, a0 -; RV32ZBB-NEXT: seqz a1, a1 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i32_eq_one: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: xor a1, a1, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: sltu a0, a2, a0 +; CHECK-NEXT: sltu a1, a3, a1 +; CHECK-NEXT: ret %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) %2 = icmp eq <2 x i32> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind { -; RV32I-LABEL: ctpop_v2i32_ne_one: -; RV32I: # %bb.0: -; RV32I-NEXT: addi a2, a0, -1 -; RV32I-NEXT: addi a3, a1, -1 -; RV32I-NEXT: xor a1, a1, a3 -; RV32I-NEXT: xor a0, a0, a2 -; RV32I-NEXT: sltu a0, a2, a0 -; RV32I-NEXT: sltu a1, a3, a1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: ctpop_v2i32_ne_one: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: addi a0, a0, -1 -; RV32ZBB-NEXT: addi a1, a1, -1 -; RV32ZBB-NEXT: snez a0, a0 -; RV32ZBB-NEXT: snez a1, a1 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i32_ne_one: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: xor a1, a1, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: sltu a0, a2, a0 +; CHECK-NEXT: sltu a1, a3, a1 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: ret %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) %2 = icmp ne <2 x i32> %1, ret <2 x i1> %2 @@ -792,200 +754,130 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { } define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind { -; RV32I-LABEL: ctpop_v2i64_ult_two: -; RV32I: # %bb.0: -; RV32I-NEXT: lw a1, 0(a0) -; RV32I-NEXT: lw a2, 8(a0) -; RV32I-NEXT: lw a3, 4(a0) -; RV32I-NEXT: lw a0, 12(a0) -; RV32I-NEXT: seqz a4, a1 -; RV32I-NEXT: seqz a5, a2 -; RV32I-NEXT: addi a6, a1, -1 -; RV32I-NEXT: addi a7, a2, -1 -; RV32I-NEXT: sub a4, a3, a4 -; RV32I-NEXT: sub a5, a0, a5 -; RV32I-NEXT: and a2, a2, a7 -; RV32I-NEXT: and a1, a1, a6 -; RV32I-NEXT: and a0, a0, a5 -; RV32I-NEXT: and a3, a3, a4 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: or a2, a2, a0 -; RV32I-NEXT: seqz a0, a1 -; RV32I-NEXT: seqz a1, a2 -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: ctpop_v2i64_ult_two: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a1, 12(a0) -; RV32ZBB-NEXT: lw a2, 8(a0) -; RV32ZBB-NEXT: lw a3, 4(a0) -; RV32ZBB-NEXT: lw a0, 0(a0) -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a2, a2 -; RV32ZBB-NEXT: cpop a3, a3 -; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: add a0, a0, a3 -; RV32ZBB-NEXT: sltiu a0, a0, 2 -; RV32ZBB-NEXT: sltiu a1, a1, 2 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i64_ult_two: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a0) +; CHECK-NEXT: lw a2, 8(a0) +; CHECK-NEXT: lw a3, 4(a0) +; CHECK-NEXT: lw a0, 12(a0) +; CHECK-NEXT: seqz a4, a1 +; CHECK-NEXT: seqz a5, a2 +; CHECK-NEXT: addi a6, a1, -1 +; CHECK-NEXT: addi a7, a2, -1 +; CHECK-NEXT: sub a4, a3, a4 +; CHECK-NEXT: sub a5, a0, a5 +; CHECK-NEXT: and a2, a2, a7 +; CHECK-NEXT: and a1, a1, a6 +; CHECK-NEXT: and a0, a0, a5 +; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: or a1, a1, a3 +; CHECK-NEXT: or a2, a2, a0 +; CHECK-NEXT: seqz a0, a1 +; CHECK-NEXT: seqz a1, a2 +; CHECK-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp ult <2 x i64> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind { -; RV32I-LABEL: ctpop_v2i64_ugt_one: -; RV32I: # %bb.0: -; RV32I-NEXT: lw a1, 0(a0) -; RV32I-NEXT: lw a2, 8(a0) -; RV32I-NEXT: lw a3, 4(a0) -; RV32I-NEXT: lw a0, 12(a0) -; RV32I-NEXT: seqz a4, a1 -; RV32I-NEXT: seqz a5, a2 -; RV32I-NEXT: addi a6, a1, -1 -; RV32I-NEXT: addi a7, a2, -1 -; RV32I-NEXT: sub a4, a3, a4 -; RV32I-NEXT: sub a5, a0, a5 -; RV32I-NEXT: and a2, a2, a7 -; RV32I-NEXT: and a1, a1, a6 -; RV32I-NEXT: and a0, a0, a5 -; RV32I-NEXT: and a3, a3, a4 -; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: or a2, a2, a0 -; RV32I-NEXT: snez a0, a1 -; RV32I-NEXT: snez a1, a2 -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: ctpop_v2i64_ugt_one: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a1, 12(a0) -; RV32ZBB-NEXT: lw a2, 8(a0) -; RV32ZBB-NEXT: lw a3, 4(a0) -; RV32ZBB-NEXT: lw a0, 0(a0) -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a2, a2 -; RV32ZBB-NEXT: cpop a3, a3 -; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: add a0, a0, a3 -; RV32ZBB-NEXT: sltiu a0, a0, 2 -; RV32ZBB-NEXT: sltiu a1, a1, 2 -; RV32ZBB-NEXT: xori a0, a0, 1 -; RV32ZBB-NEXT: xori a1, a1, 1 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i64_ugt_one: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a1, 0(a0) +; CHECK-NEXT: lw a2, 8(a0) +; CHECK-NEXT: lw a3, 4(a0) +; CHECK-NEXT: lw a0, 12(a0) +; CHECK-NEXT: seqz a4, a1 +; CHECK-NEXT: seqz a5, a2 +; CHECK-NEXT: addi a6, a1, -1 +; CHECK-NEXT: addi a7, a2, -1 +; CHECK-NEXT: sub a4, a3, a4 +; CHECK-NEXT: sub a5, a0, a5 +; CHECK-NEXT: and a2, a2, a7 +; CHECK-NEXT: and a1, a1, a6 +; CHECK-NEXT: and a0, a0, a5 +; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: or a1, a1, a3 +; CHECK-NEXT: or a2, a2, a0 +; CHECK-NEXT: snez a0, a1 +; CHECK-NEXT: snez a1, a2 +; CHECK-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp ugt <2 x i64> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind { -; RV32I-LABEL: ctpop_v2i64_eq_one: -; RV32I: # %bb.0: -; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: lw a0, 0(a0) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a2, 12(a1) -; RV32I-NEXT: beqz a3, .LBB22_3 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: seqz a0, a0 -; RV32I-NEXT: sub a0, a3, a0 -; RV32I-NEXT: xor a3, a3, a0 -; RV32I-NEXT: sltu a0, a0, a3 -; RV32I-NEXT: lw a1, 8(a1) -; RV32I-NEXT: bnez a2, .LBB22_4 -; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: addi a2, a1, -1 -; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: sltu a1, a2, a1 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB22_3: -; RV32I-NEXT: addi a3, a0, -1 -; RV32I-NEXT: xor a0, a0, a3 -; RV32I-NEXT: sltu a0, a3, a0 -; RV32I-NEXT: lw a1, 8(a1) -; RV32I-NEXT: beqz a2, .LBB22_2 -; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: sub a1, a2, a1 -; RV32I-NEXT: xor a2, a2, a1 -; RV32I-NEXT: sltu a1, a1, a2 -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: ctpop_v2i64_eq_one: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a1, 12(a0) -; RV32ZBB-NEXT: lw a2, 8(a0) -; RV32ZBB-NEXT: lw a3, 4(a0) -; RV32ZBB-NEXT: lw a0, 0(a0) -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a2, a2 -; RV32ZBB-NEXT: cpop a3, a3 -; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: add a0, a0, a3 -; RV32ZBB-NEXT: addi a0, a0, -1 -; RV32ZBB-NEXT: addi a1, a1, -1 -; RV32ZBB-NEXT: seqz a0, a0 -; RV32ZBB-NEXT: seqz a1, a1 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i64_eq_one: +; CHECK: # %bb.0: +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: lw a0, 0(a0) +; CHECK-NEXT: lw a3, 4(a1) +; CHECK-NEXT: lw a2, 12(a1) +; CHECK-NEXT: beqz a3, .LBB22_3 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: sub a0, a3, a0 +; CHECK-NEXT: xor a3, a3, a0 +; CHECK-NEXT: sltu a0, a0, a3 +; CHECK-NEXT: lw a1, 8(a1) +; CHECK-NEXT: bnez a2, .LBB22_4 +; CHECK-NEXT: .LBB22_2: +; CHECK-NEXT: addi a2, a1, -1 +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: sltu a1, a2, a1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB22_3: +; CHECK-NEXT: addi a3, a0, -1 +; CHECK-NEXT: xor a0, a0, a3 +; CHECK-NEXT: sltu a0, a3, a0 +; CHECK-NEXT: lw a1, 8(a1) +; CHECK-NEXT: beqz a2, .LBB22_2 +; CHECK-NEXT: .LBB22_4: +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: xor a2, a2, a1 +; CHECK-NEXT: sltu a1, a1, a2 +; CHECK-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp eq <2 x i64> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind { -; RV32I-LABEL: ctpop_v2i64_ne_one: -; RV32I: # %bb.0: -; RV32I-NEXT: lw a2, 0(a0) -; RV32I-NEXT: lw a3, 4(a0) -; RV32I-NEXT: lw a1, 12(a0) -; RV32I-NEXT: beqz a3, .LBB23_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: seqz a2, a2 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: xor a3, a3, a2 -; RV32I-NEXT: sltu a2, a2, a3 -; RV32I-NEXT: j .LBB23_3 -; RV32I-NEXT: .LBB23_2: -; RV32I-NEXT: addi a3, a2, -1 -; RV32I-NEXT: xor a2, a2, a3 -; RV32I-NEXT: sltu a2, a3, a2 -; RV32I-NEXT: .LBB23_3: -; RV32I-NEXT: lw a3, 8(a0) -; RV32I-NEXT: xori a0, a2, 1 -; RV32I-NEXT: beqz a1, .LBB23_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: seqz a2, a3 -; RV32I-NEXT: sub a2, a1, a2 -; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: sltu a1, a2, a1 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB23_5: -; RV32I-NEXT: addi a1, a3, -1 -; RV32I-NEXT: xor a3, a3, a1 -; RV32I-NEXT: sltu a1, a1, a3 -; RV32I-NEXT: xori a1, a1, 1 -; RV32I-NEXT: ret -; -; RV32ZBB-LABEL: ctpop_v2i64_ne_one: -; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a1, 12(a0) -; RV32ZBB-NEXT: lw a2, 8(a0) -; RV32ZBB-NEXT: lw a3, 4(a0) -; RV32ZBB-NEXT: lw a0, 0(a0) -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a2, a2 -; RV32ZBB-NEXT: cpop a3, a3 -; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: add a0, a0, a3 -; RV32ZBB-NEXT: addi a0, a0, -1 -; RV32ZBB-NEXT: addi a1, a1, -1 -; RV32ZBB-NEXT: snez a0, a0 -; RV32ZBB-NEXT: snez a1, a1 -; RV32ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i64_ne_one: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a2, 0(a0) +; CHECK-NEXT: lw a3, 4(a0) +; CHECK-NEXT: lw a1, 12(a0) +; CHECK-NEXT: beqz a3, .LBB23_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: seqz a2, a2 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: xor a3, a3, a2 +; CHECK-NEXT: sltu a2, a2, a3 +; CHECK-NEXT: j .LBB23_3 +; CHECK-NEXT: .LBB23_2: +; CHECK-NEXT: addi a3, a2, -1 +; CHECK-NEXT: xor a2, a2, a3 +; CHECK-NEXT: sltu a2, a3, a2 +; CHECK-NEXT: .LBB23_3: +; CHECK-NEXT: lw a3, 8(a0) +; CHECK-NEXT: xori a0, a2, 1 +; CHECK-NEXT: beqz a1, .LBB23_5 +; CHECK-NEXT: # %bb.4: +; CHECK-NEXT: seqz a2, a3 +; CHECK-NEXT: sub a2, a1, a2 +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: sltu a1, a2, a1 +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB23_5: +; CHECK-NEXT: addi a1, a3, -1 +; CHECK-NEXT: xor a3, a3, a1 +; CHECK-NEXT: sltu a1, a1, a3 +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp ne <2 x i64> %1, ret <2 x i1> %2 diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index d133f9d1db389..d8b7bfcbceb27 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -762,108 +762,70 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %a) nounwind { } define <2 x i1> @ctpop_v2i32_ult_two(<2 x i32> %a) nounwind { -; RV64I-LABEL: ctpop_v2i32_ult_two: -; RV64I: # %bb.0: -; RV64I-NEXT: addi a2, a0, -1 -; RV64I-NEXT: addi a3, a1, -1 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: sext.w a1, a1 -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: seqz a1, a1 -; RV64I-NEXT: ret -; -; RV64ZBB-LABEL: ctpop_v2i32_ult_two: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: cpopw a1, a1 -; RV64ZBB-NEXT: cpopw a0, a0 -; RV64ZBB-NEXT: sltiu a0, a0, 2 -; RV64ZBB-NEXT: sltiu a1, a1, 2 -; RV64ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i32_ult_two: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: and a1, a1, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: ret %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) %2 = icmp ult <2 x i32> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i32_ugt_one(<2 x i32> %a) nounwind { -; RV64I-LABEL: ctpop_v2i32_ugt_one: -; RV64I: # %bb.0: -; RV64I-NEXT: addi a2, a0, -1 -; RV64I-NEXT: addi a3, a1, -1 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: sext.w a1, a1 -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: snez a1, a1 -; RV64I-NEXT: ret -; -; RV64ZBB-LABEL: ctpop_v2i32_ugt_one: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: cpopw a1, a1 -; RV64ZBB-NEXT: cpopw a0, a0 -; RV64ZBB-NEXT: sltiu a0, a0, 2 -; RV64ZBB-NEXT: sltiu a1, a1, 2 -; RV64ZBB-NEXT: xori a0, a0, 1 -; RV64ZBB-NEXT: xori a1, a1, 1 -; RV64ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i32_ugt_one: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: and a1, a1, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: ret %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) %2 = icmp ugt <2 x i32> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind { -; RV64I-LABEL: ctpop_v2i32_eq_one: -; RV64I: # %bb.0: -; RV64I-NEXT: addiw a2, a0, -1 -; RV64I-NEXT: addiw a3, a1, -1 -; RV64I-NEXT: xor a1, a1, a3 -; RV64I-NEXT: xor a0, a0, a2 -; RV64I-NEXT: sext.w a1, a1 -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: sltu a0, a2, a0 -; RV64I-NEXT: sltu a1, a3, a1 -; RV64I-NEXT: ret -; -; RV64ZBB-LABEL: ctpop_v2i32_eq_one: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: cpopw a1, a1 -; RV64ZBB-NEXT: cpopw a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 -; RV64ZBB-NEXT: addi a1, a1, -1 -; RV64ZBB-NEXT: seqz a0, a0 -; RV64ZBB-NEXT: seqz a1, a1 -; RV64ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i32_eq_one: +; CHECK: # %bb.0: +; CHECK-NEXT: addiw a2, a0, -1 +; CHECK-NEXT: addiw a3, a1, -1 +; CHECK-NEXT: xor a1, a1, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: sltu a0, a2, a0 +; CHECK-NEXT: sltu a1, a3, a1 +; CHECK-NEXT: ret %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) %2 = icmp eq <2 x i32> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind { -; RV64I-LABEL: ctpop_v2i32_ne_one: -; RV64I: # %bb.0: -; RV64I-NEXT: addiw a2, a0, -1 -; RV64I-NEXT: addiw a3, a1, -1 -; RV64I-NEXT: xor a1, a1, a3 -; RV64I-NEXT: xor a0, a0, a2 -; RV64I-NEXT: sext.w a1, a1 -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: sltu a0, a2, a0 -; RV64I-NEXT: sltu a1, a3, a1 -; RV64I-NEXT: xori a0, a0, 1 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: ret -; -; RV64ZBB-LABEL: ctpop_v2i32_ne_one: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: cpopw a1, a1 -; RV64ZBB-NEXT: cpopw a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 -; RV64ZBB-NEXT: addi a1, a1, -1 -; RV64ZBB-NEXT: snez a0, a0 -; RV64ZBB-NEXT: snez a1, a1 -; RV64ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i32_ne_one: +; CHECK: # %bb.0: +; CHECK-NEXT: addiw a2, a0, -1 +; CHECK-NEXT: addiw a3, a1, -1 +; CHECK-NEXT: xor a1, a1, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: sltu a0, a2, a0 +; CHECK-NEXT: sltu a1, a3, a1 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: ret %1 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a) %2 = icmp ne <2 x i32> %1, ret <2 x i1> %2 @@ -1052,100 +1014,62 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { } define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind { -; RV64I-LABEL: ctpop_v2i64_ult_two: -; RV64I: # %bb.0: -; RV64I-NEXT: addi a2, a0, -1 -; RV64I-NEXT: addi a3, a1, -1 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: seqz a0, a0 -; RV64I-NEXT: seqz a1, a1 -; RV64I-NEXT: ret -; -; RV64ZBB-LABEL: ctpop_v2i64_ult_two: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: cpop a1, a1 -; RV64ZBB-NEXT: cpop a0, a0 -; RV64ZBB-NEXT: sltiu a0, a0, 2 -; RV64ZBB-NEXT: sltiu a1, a1, 2 -; RV64ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i64_ult_two: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: and a1, a1, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp ult <2 x i64> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind { -; RV64I-LABEL: ctpop_v2i64_ugt_one: -; RV64I: # %bb.0: -; RV64I-NEXT: addi a2, a0, -1 -; RV64I-NEXT: addi a3, a1, -1 -; RV64I-NEXT: and a1, a1, a3 -; RV64I-NEXT: and a0, a0, a2 -; RV64I-NEXT: snez a0, a0 -; RV64I-NEXT: snez a1, a1 -; RV64I-NEXT: ret -; -; RV64ZBB-LABEL: ctpop_v2i64_ugt_one: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: cpop a1, a1 -; RV64ZBB-NEXT: cpop a0, a0 -; RV64ZBB-NEXT: sltiu a0, a0, 2 -; RV64ZBB-NEXT: sltiu a1, a1, 2 -; RV64ZBB-NEXT: xori a0, a0, 1 -; RV64ZBB-NEXT: xori a1, a1, 1 -; RV64ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i64_ugt_one: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: and a1, a1, a3 +; CHECK-NEXT: and a0, a0, a2 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp ugt <2 x i64> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind { -; RV64I-LABEL: ctpop_v2i64_eq_one: -; RV64I: # %bb.0: -; RV64I-NEXT: addi a2, a0, -1 -; RV64I-NEXT: addi a3, a1, -1 -; RV64I-NEXT: xor a1, a1, a3 -; RV64I-NEXT: xor a0, a0, a2 -; RV64I-NEXT: sltu a0, a2, a0 -; RV64I-NEXT: sltu a1, a3, a1 -; RV64I-NEXT: ret -; -; RV64ZBB-LABEL: ctpop_v2i64_eq_one: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: cpop a1, a1 -; RV64ZBB-NEXT: cpop a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 -; RV64ZBB-NEXT: addi a1, a1, -1 -; RV64ZBB-NEXT: seqz a0, a0 -; RV64ZBB-NEXT: seqz a1, a1 -; RV64ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i64_eq_one: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: xor a1, a1, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: sltu a0, a2, a0 +; CHECK-NEXT: sltu a1, a3, a1 +; CHECK-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp eq <2 x i64> %1, ret <2 x i1> %2 } define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind { -; RV64I-LABEL: ctpop_v2i64_ne_one: -; RV64I: # %bb.0: -; RV64I-NEXT: addi a2, a0, -1 -; RV64I-NEXT: addi a3, a1, -1 -; RV64I-NEXT: xor a1, a1, a3 -; RV64I-NEXT: xor a0, a0, a2 -; RV64I-NEXT: sltu a0, a2, a0 -; RV64I-NEXT: sltu a1, a3, a1 -; RV64I-NEXT: xori a0, a0, 1 -; RV64I-NEXT: xori a1, a1, 1 -; RV64I-NEXT: ret -; -; RV64ZBB-LABEL: ctpop_v2i64_ne_one: -; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: cpop a1, a1 -; RV64ZBB-NEXT: cpop a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 -; RV64ZBB-NEXT: addi a1, a1, -1 -; RV64ZBB-NEXT: snez a0, a0 -; RV64ZBB-NEXT: snez a1, a1 -; RV64ZBB-NEXT: ret +; CHECK-LABEL: ctpop_v2i64_ne_one: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, a0, -1 +; CHECK-NEXT: addi a3, a1, -1 +; CHECK-NEXT: xor a1, a1, a3 +; CHECK-NEXT: xor a0, a0, a2 +; CHECK-NEXT: sltu a0, a2, a0 +; CHECK-NEXT: sltu a1, a3, a1 +; CHECK-NEXT: xori a0, a0, 1 +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp ne <2 x i64> %1, ret <2 x i1> %2 diff --git a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll index 9e4a10d9eb864..9c5df5f70fc15 100644 --- a/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll +++ b/llvm/test/Transforms/CodeGenPrepare/unfold-pow2-test-vec.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -p 'require,function(codegenprepare)' -S %s \ ; RUN: | FileCheck %s --check-prefix=SLOW -; RUN: opt -p 'require,function(codegenprepare)' -S --mattr=+zvbb %s \ +; RUN: opt -p 'require,function(codegenprepare)' -S --mattr=+v,+zvbb %s \ ; RUN: | FileCheck %s --check-prefix=FAST ; REQUIRES: riscv-registered-target