Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 40 additions & 21 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19443,20 +19443,37 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
return CSNeg;
}

static std::optional<unsigned> IsSVECntIntrinsic(SDValue S) {
static bool IsSVECntIntrinsic(SDValue S) {
switch(getIntrinsicID(S.getNode())) {
default:
break;
case Intrinsic::aarch64_sve_cntb:
return 8;
case Intrinsic::aarch64_sve_cnth:
return 16;
case Intrinsic::aarch64_sve_cntw:
return 32;
case Intrinsic::aarch64_sve_cntd:
return 64;
return true;
}
return false;
}

// Returns the maximum (scalable) value that can be returned by an SVE count
// intrinsic. Returns std::nullopt if \p Op is not aarch64_sve_cnt*.
static std::optional<ElementCount> getMaxValueForSVECntIntrinsic(SDValue Op) {
Intrinsic::ID IID = getIntrinsicID(Op.getNode());
if (IID == Intrinsic::aarch64_sve_cntp)
return Op.getOperand(1).getValueType().getVectorElementCount();
switch (IID) {
case Intrinsic::aarch64_sve_cntd:
return ElementCount::getScalable(2);
case Intrinsic::aarch64_sve_cntw:
return ElementCount::getScalable(4);
case Intrinsic::aarch64_sve_cnth:
return ElementCount::getScalable(8);
case Intrinsic::aarch64_sve_cntb:
return ElementCount::getScalable(16);
default:
return std::nullopt;
}
return {};
}

/// Calculates what the pre-extend type is, based on the extension
Expand Down Expand Up @@ -31666,22 +31683,24 @@ bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
return false;
}
case ISD::INTRINSIC_WO_CHAIN: {
if (auto ElementSize = IsSVECntIntrinsic(Op)) {
unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
if (!MaxSVEVectorSizeInBits)
MaxSVEVectorSizeInBits = AArch64::SVEMaxBitsPerVector;
unsigned MaxElements = MaxSVEVectorSizeInBits / *ElementSize;
// The SVE count intrinsics don't support the multiplier immediate so we
// don't have to account for that here. The value returned may be slightly
// over the true required bits, as this is based on the "ALL" pattern. The
// other patterns are also exposed by these intrinsics, but they all
// return a value that's strictly less than "ALL".
unsigned RequiredBits = llvm::bit_width(MaxElements);
unsigned BitWidth = Known.Zero.getBitWidth();
if (RequiredBits < BitWidth)
Known.Zero.setHighBits(BitWidth - RequiredBits);
std::optional<ElementCount> MaxCount = getMaxValueForSVECntIntrinsic(Op);
if (!MaxCount)
return false;
}
unsigned MaxSVEVectorSizeInBits = Subtarget->getMaxSVEVectorSizeInBits();
if (!MaxSVEVectorSizeInBits)
MaxSVEVectorSizeInBits = AArch64::SVEMaxBitsPerVector;
unsigned VscaleMax = MaxSVEVectorSizeInBits / 128;
unsigned MaxValue = MaxCount->getKnownMinValue() * VscaleMax;
// The SVE count intrinsics don't support the multiplier immediate so we
// don't have to account for that here. The value returned may be slightly
// over the true required bits, as this is based on the "ALL" pattern. The
// other patterns are also exposed by these intrinsics, but they all
// return a value that's strictly less than "ALL".
unsigned RequiredBits = llvm::bit_width(MaxValue);
unsigned BitWidth = Known.Zero.getBitWidth();
if (RequiredBits < BitWidth)
Known.Zero.setHighBits(BitWidth - RequiredBits);
return false;
}
}

Expand Down
11 changes: 5 additions & 6 deletions llvm/test/CodeGen/AArch64/sve-vector-compress.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,20 +143,19 @@ define <vscale x 8 x i32> @test_compress_large(<vscale x 8 x i32> %vec, <vscale
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 16 + 16 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: punpklo p2.h, p0.b
; CHECK-NEXT: punpklo p1.h, p0.b
; CHECK-NEXT: cnth x9
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: sub x9, x9, #1
; CHECK-NEXT: punpkhi p0.h, p0.b
; CHECK-NEXT: compact z0.s, p2, z0.s
; CHECK-NEXT: cntp x8, p1, p2.s
; CHECK-NEXT: compact z0.s, p1, z0.s
; CHECK-NEXT: cntp x8, p2, p1.s
; CHECK-NEXT: compact z1.s, p0, z1.s
; CHECK-NEXT: str z0, [sp]
; CHECK-NEXT: mov w8, w8
; CHECK-NEXT: cmp x8, x9
; CHECK-NEXT: csel x8, x8, x9, lo
; CHECK-NEXT: mov x9, sp
; CHECK-NEXT: st1w { z1.s }, p1, [x9, x8, lsl #2]
; CHECK-NEXT: st1w { z1.s }, p2, [x9, x8, lsl #2]
; CHECK-NEXT: ldr z0, [sp]
; CHECK-NEXT: ldr z1, [sp, #1, mul vl]
; CHECK-NEXT: addvl sp, sp, #2
Expand Down
56 changes: 56 additions & 0 deletions llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,62 @@ define i64 @cntd_and_elimination() {
ret i64 %result
}

define i64 @cntp_nxv16i1_and_elimination(<vscale x 16 x i1> %p) {
; CHECK-LABEL: cntp_nxv16i1_and_elimination:
; CHECK: // %bb.0:
; CHECK-NEXT: cntp x8, p0, p0.b
; CHECK-NEXT: and x9, x8, #0x1fc
; CHECK-NEXT: add x0, x8, x9
; CHECK-NEXT: ret
%cntp = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %p, <vscale x 16 x i1> %p)
%and_redundant = and i64 %cntp, 511
%and_required = and i64 %cntp, 17179869180
%result = add i64 %and_redundant, %and_required
ret i64 %result
}

define i64 @cntp_nxv8i1_and_elimination(<vscale x 8 x i1> %p) {
; CHECK-LABEL: cntp_nxv8i1_and_elimination:
; CHECK: // %bb.0:
; CHECK-NEXT: cntp x8, p0, p0.h
; CHECK-NEXT: and x9, x8, #0xfc
; CHECK-NEXT: add x0, x8, x9
; CHECK-NEXT: ret
%cntp = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %p, <vscale x 8 x i1> %p)
%and_redundant = and i64 %cntp, 1023
%and_required = and i64 %cntp, 17179869180
%result = add i64 %and_redundant, %and_required
ret i64 %result
}

define i64 @cntp_nxv4i1_and_elimination(<vscale x 4 x i1> %p) {
; CHECK-LABEL: cntp_nxv4i1_and_elimination:
; CHECK: // %bb.0:
; CHECK-NEXT: cntp x8, p0, p0.s
; CHECK-NEXT: and x9, x8, #0x7c
; CHECK-NEXT: add x0, x8, x9
; CHECK-NEXT: ret
%cntp = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %p, <vscale x 4 x i1> %p)
%and_redundant = and i64 %cntp, 127
%and_required = and i64 %cntp, 17179869180
%result = add i64 %and_redundant, %and_required
ret i64 %result
}

define i64 @cntp_nxv2i1_and_elimination(<vscale x 2 x i1> %p) {
; CHECK-LABEL: cntp_nxv2i1_and_elimination:
; CHECK: // %bb.0:
; CHECK-NEXT: cntp x8, p0, p0.d
; CHECK-NEXT: and x9, x8, #0x3c
; CHECK-NEXT: add x0, x8, x9
; CHECK-NEXT: ret
%cntp = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %p, <vscale x 2 x i1> %p)
%and_redundant = and i64 %cntp, 63
%and_required = and i64 %cntp, 17179869180
%result = add i64 %and_redundant, %and_required
ret i64 %result
}

define i64 @vscale_trunc_zext() vscale_range(1,16) {
; CHECK-LABEL: vscale_trunc_zext:
; CHECK: // %bb.0:
Expand Down