Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15501,6 +15501,72 @@ SDValue AArch64TargetLowering::LowerFixedLengthBuildVectorToSVE(
return convertFromScalableVector(DAG, VT, Vec);
}

static std::optional<int64_t> getSplatConstant(SDValue V,
ConstantSDNode *&Const) {
if (auto *BV = dyn_cast<BuildVectorSDNode>(V))
if ((Const = dyn_cast_if_present<ConstantSDNode>(BV->getSplatValue())))
return Const->getZExtValue();
return std::nullopt;
}

static bool isSVESplatImmForOp(unsigned Opcode, MVT VT, int64_t SplatImm) {
// TODO: Support more than integer binops.
switch (Opcode) {
case ISD::SUB:
case ISD::ADD:
return isUInt<8>(SplatImm) || (VT.getFixedSizeInBits() > 8 &&
isUInt<16>(SplatImm) && SplatImm % 256 == 0);
case ISD::XOR:
case ISD::OR:
case ISD::AND:
return AArch64_AM::isLogicalImmediate(SplatImm, 64);
case ISD::MUL:
return isInt<8>(SplatImm);
default:
return false;
}
}

static SDValue tryFoldSplatIntoUsersWithSVE(SDValue Op, SelectionDAG &DAG) {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this is necessarily always preferable, or if there's some target property this should be dependent on?

if (!Subtarget.isSVEorStreamingSVEAvailable())
return SDValue();

EVT VT = Op->getValueType(0);
if (!VT.is128BitVector())
return SDValue();

ConstantSDNode *Splat;
auto SplatImm = getSplatConstant(Op, Splat);
if (!SplatImm)
return SDValue();

EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

for (SDUse &U : Op->uses()) {
SDNode *User = U.getUser();
unsigned UserOpc = User->getOpcode();
if (U.getOperandNo() != 1 ||
!isSVESplatImmForOp(UserOpc, VT.getScalarType().getSimpleVT(),
*SplatImm))
continue;

SDLoc DL(U);
SDValue LHS =
convertToScalableVector(DAG, ContainerVT, User->getOperand(0));
SDValue SVESplat = DAG.getSplatVector(ContainerVT, DL, SDValue(Splat, 0));
SDValue Result = DAG.getNode(UserOpc, DL, ContainerVT, LHS, SVESplat);
Result = convertFromScalableVector(DAG, VT, Result);
DAG.ReplaceAllUsesWith(SDValue(User, 0), Result);
}

// FIXME: We always have to return SDValue() as LowerBUILD_VECTOR is called in
// many places, and there's no guarantee `Op->uses()` contains all the users.
// This means the BV will still be lowered (but then DCE'd if we replaced all
// users in this fold).
return SDValue();
}

SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
Expand Down Expand Up @@ -15535,6 +15601,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return Op;
}

if (SDValue V = tryFoldSplatIntoUsersWithSVE(Op, DAG))
return V;

if (SDValue V = ConstantBuildVector(Op, DAG, Subtarget))
return V;

Expand Down
170 changes: 111 additions & 59 deletions llvm/test/CodeGen/AArch64/aarch64-smull.ll
Original file line number Diff line number Diff line change
Expand Up @@ -385,11 +385,11 @@ define <4 x i32> @amull_v4i16_v4i32(ptr %A, ptr %B) nounwind {
;
; CHECK-SVE-LABEL: amull_v4i16_v4i32:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: ldr d1, [x0]
; CHECK-SVE-NEXT: ldr d2, [x1]
; CHECK-SVE-NEXT: movi v0.2d, #0x00ffff0000ffff
; CHECK-SVE-NEXT: smull v1.4s, v1.4h, v2.4h
; CHECK-SVE-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-SVE-NEXT: ldr d0, [x0]
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: smull v0.4s, v0.4h, v1.4h
; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_v4i16_v4i32:
Expand Down Expand Up @@ -421,11 +421,11 @@ define <2 x i64> @amull_v2i32_v2i64(ptr %A, ptr %B) nounwind {
;
; CHECK-SVE-LABEL: amull_v2i32_v2i64:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: ldr d1, [x0]
; CHECK-SVE-NEXT: ldr d2, [x1]
; CHECK-SVE-NEXT: movi v0.2d, #0x000000ffffffff
; CHECK-SVE-NEXT: smull v1.2d, v1.2s, v2.2s
; CHECK-SVE-NEXT: and v0.16b, v1.16b, v0.16b
; CHECK-SVE-NEXT: ldr d0, [x0]
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_v2i32_v2i64:
Expand Down Expand Up @@ -609,8 +609,8 @@ define <4 x i32> @amlal_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlal v0.4s, v1.4h, v2.4h
; CHECK-SVE-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlal_v4i16_v4i32:
Expand Down Expand Up @@ -650,8 +650,8 @@ define <2 x i64> @amlal_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlal v0.2d, v1.2s, v2.2s
; CHECK-SVE-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlal_v2i32_v2i64:
Expand Down Expand Up @@ -838,8 +838,8 @@ define <4 x i32> @amlsl_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlsl v0.4s, v1.4h, v2.4h
; CHECK-SVE-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlsl_v4i16_v4i32:
Expand Down Expand Up @@ -879,8 +879,8 @@ define <2 x i64> @amlsl_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-SVE-NEXT: ldr d1, [x1]
; CHECK-SVE-NEXT: ldr d2, [x2]
; CHECK-SVE-NEXT: smlsl v0.2d, v1.2s, v2.2s
; CHECK-SVE-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amlsl_v2i32_v2i64:
Expand Down Expand Up @@ -1118,8 +1118,8 @@ define <4 x i32> @amull_extvec_v4i16_v4i32(<4 x i16> %arg) nounwind {
; CHECK-SVE-NEXT: mov w8, #1234 // =0x4d2
; CHECK-SVE-NEXT: dup v1.4h, w8
; CHECK-SVE-NEXT: smull v0.4s, v0.4h, v1.4h
; CHECK-SVE-NEXT: movi v1.2d, #0x00ffff0000ffff
; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SVE-NEXT: and z0.s, z0.s, #0xffff
; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_extvec_v4i16_v4i32:
Expand Down Expand Up @@ -1151,8 +1151,8 @@ define <2 x i64> @amull_extvec_v2i32_v2i64(<2 x i32> %arg) nounwind {
; CHECK-SVE-NEXT: mov w8, #1234 // =0x4d2
; CHECK-SVE-NEXT: dup v1.2s, w8
; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s
; CHECK-SVE-NEXT: movi v1.2d, #0x000000ffffffff
; CHECK-SVE-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-SVE-NEXT: and z0.d, z0.d, #0xffffffff
; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull_extvec_v2i32_v2i64:
Expand Down Expand Up @@ -1464,11 +1464,12 @@ define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) {
;
; CHECK-SVE-LABEL: amull2_i16:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: movi v2.2d, #0x00ffff0000ffff
; CHECK-SVE-NEXT: smull v3.4s, v0.4h, v1.4h
; CHECK-SVE-NEXT: smull2 v0.4s, v0.8h, v1.8h
; CHECK-SVE-NEXT: and v1.16b, v0.16b, v2.16b
; CHECK-SVE-NEXT: and v0.16b, v3.16b, v2.16b
; CHECK-SVE-NEXT: smull v2.4s, v0.4h, v1.4h
; CHECK-SVE-NEXT: smull2 v1.4s, v0.8h, v1.8h
; CHECK-SVE-NEXT: and z2.s, z2.s, #0xffff
; CHECK-SVE-NEXT: and z1.s, z1.s, #0xffff
; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-SVE-NEXT: mov v0.16b, v2.16b
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull2_i16:
Expand Down Expand Up @@ -1498,11 +1499,12 @@ define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) {
;
; CHECK-SVE-LABEL: amull2_i32:
; CHECK-SVE: // %bb.0:
; CHECK-SVE-NEXT: movi v2.2d, #0x000000ffffffff
; CHECK-SVE-NEXT: smull v3.2d, v0.2s, v1.2s
; CHECK-SVE-NEXT: smull2 v0.2d, v0.4s, v1.4s
; CHECK-SVE-NEXT: and v1.16b, v0.16b, v2.16b
; CHECK-SVE-NEXT: and v0.16b, v3.16b, v2.16b
; CHECK-SVE-NEXT: smull v2.2d, v0.2s, v1.2s
; CHECK-SVE-NEXT: smull2 v1.2d, v0.4s, v1.4s
; CHECK-SVE-NEXT: and z2.d, z2.d, #0xffffffff
; CHECK-SVE-NEXT: and z1.d, z1.d, #0xffffffff
; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
; CHECK-SVE-NEXT: mov v0.16b, v2.16b
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: amull2_i32:
Expand Down Expand Up @@ -1580,13 +1582,29 @@ entry:
}

define <8 x i16> @umull_and256_v8i16(<8 x i8> %src1, <8 x i16> %src2) {
; CHECK-LABEL: umull_and256_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v2.8h, #1, lsl #8
; CHECK-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT: ret
; CHECK-NEON-LABEL: umull_and256_v8i16:
; CHECK-NEON: // %bb.0: // %entry
; CHECK-NEON-NEXT: movi v2.8h, #1, lsl #8
; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-NEON-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEON-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-NEON-NEXT: ret
;
; CHECK-SVE-LABEL: umull_and256_v8i16:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-SVE-NEXT: and z1.h, z1.h, #0x100
; CHECK-SVE-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umull_and256_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.8h, #1, lsl #8
; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h
; CHECK-GI-NEXT: ret
entry:
%in1 = zext <8 x i8> %src1 to <8 x i16>
%in2 = and <8 x i16> %src2, <i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256, i16 256>
Expand Down Expand Up @@ -1654,13 +1672,29 @@ entry:
}

define <4 x i32> @umull_and_v4i32(<4 x i16> %src1, <4 x i32> %src2) {
; CHECK-LABEL: umull_and_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: xtn v1.4h, v1.4s
; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-NEXT: ret
; CHECK-NEON-LABEL: umull_and_v4i32:
; CHECK-NEON: // %bb.0: // %entry
; CHECK-NEON-NEXT: movi v2.2d, #0x0000ff000000ff
; CHECK-NEON-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEON-NEXT: xtn v1.4h, v1.4s
; CHECK-NEON-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-NEON-NEXT: ret
;
; CHECK-SVE-LABEL: umull_and_v4i32:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-SVE-NEXT: and z1.s, z1.s, #0xff
; CHECK-SVE-NEXT: xtn v1.4h, v1.4s
; CHECK-SVE-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umull_and_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0x0000ff000000ff
; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: xtn v1.4h, v1.4s
; CHECK-GI-NEXT: umull v0.4s, v0.4h, v1.4h
; CHECK-GI-NEXT: ret
entry:
%in1 = zext <4 x i16> %src1 to <4 x i32>
%in2 = and <4 x i32> %src2, <i32 255, i32 255, i32 255, i32 255>
Expand All @@ -1681,9 +1715,10 @@ define <8 x i32> @umull_and_v8i32(<8 x i16> %src1, <8 x i32> %src2) {
;
; CHECK-SVE-LABEL: umull_and_v8i32:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: movi v3.2d, #0x0000ff000000ff
; CHECK-SVE-NEXT: and v2.16b, v2.16b, v3.16b
; CHECK-SVE-NEXT: and v1.16b, v1.16b, v3.16b
; CHECK-SVE-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-SVE-NEXT: and z2.s, z2.s, #0xff
; CHECK-SVE-NEXT: and z1.s, z1.s, #0xff
; CHECK-SVE-NEXT: uzp1 v2.8h, v1.8h, v2.8h
; CHECK-SVE-NEXT: umull2 v1.4s, v0.8h, v2.8h
; CHECK-SVE-NEXT: umull v0.4s, v0.4h, v2.4h
Expand Down Expand Up @@ -1743,13 +1778,29 @@ entry:
}

define <2 x i64> @umull_and_v2i64(<2 x i32> %src1, <2 x i64> %src2) {
; CHECK-LABEL: umull_and_v2i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: movi v2.2d, #0x000000000000ff
; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEXT: xtn v1.2s, v1.2d
; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-NEXT: ret
; CHECK-NEON-LABEL: umull_and_v2i64:
; CHECK-NEON: // %bb.0: // %entry
; CHECK-NEON-NEXT: movi v2.2d, #0x000000000000ff
; CHECK-NEON-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-NEON-NEXT: xtn v1.2s, v1.2d
; CHECK-NEON-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-NEON-NEXT: ret
;
; CHECK-SVE-LABEL: umull_and_v2i64:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-SVE-NEXT: and z1.d, z1.d, #0xff
; CHECK-SVE-NEXT: xtn v1.2s, v1.2d
; CHECK-SVE-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-SVE-NEXT: ret
;
; CHECK-GI-LABEL: umull_and_v2i64:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: movi v2.2d, #0x000000000000ff
; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: xtn v1.2s, v1.2d
; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
; CHECK-GI-NEXT: ret
entry:
%in1 = zext <2 x i32> %src1 to <2 x i64>
%in2 = and <2 x i64> %src2, <i64 255, i64 255>
Expand All @@ -1770,9 +1821,10 @@ define <4 x i64> @umull_and_v4i64(<4 x i32> %src1, <4 x i64> %src2) {
;
; CHECK-SVE-LABEL: umull_and_v4i64:
; CHECK-SVE: // %bb.0: // %entry
; CHECK-SVE-NEXT: movi v3.2d, #0x000000000000ff
; CHECK-SVE-NEXT: and v2.16b, v2.16b, v3.16b
; CHECK-SVE-NEXT: and v1.16b, v1.16b, v3.16b
; CHECK-SVE-NEXT: // kill: def $q2 killed $q2 def $z2
; CHECK-SVE-NEXT: // kill: def $q1 killed $q1 def $z1
; CHECK-SVE-NEXT: and z2.d, z2.d, #0xff
; CHECK-SVE-NEXT: and z1.d, z1.d, #0xff
; CHECK-SVE-NEXT: uzp1 v2.4s, v1.4s, v2.4s
; CHECK-SVE-NEXT: umull2 v1.2d, v0.4s, v2.4s
; CHECK-SVE-NEXT: umull v0.2d, v0.2s, v2.2s
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/extract-vector-cmp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,13 @@ define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
; CHECK-LABEL: vector_loop_with_icmp:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: index z0.d, #0, #1
; CHECK-NEXT: mov z1.d, #2 // =0x2
; CHECK-NEXT: add x8, x0, #4
; CHECK-NEXT: mov w9, #16 // =0x10
; CHECK-NEXT: mov w10, #1 // =0x1
; CHECK-NEXT: b .LBB5_2
; CHECK-NEXT: .LBB5_1: // %pred.store.continue6
; CHECK-NEXT: // in Loop: Header=BB5_2 Depth=1
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: add z0.d, z0.d, #2 // =0x2
; CHECK-NEXT: subs x9, x9, #2
; CHECK-NEXT: add x8, x8, #8
; CHECK-NEXT: b.eq .LBB5_6
Expand Down
Loading
Loading