Skip to content

Commit

Permalink
[AArch64-SVE]: Force generating code compatible to streaming mode.
Browse files Browse the repository at this point in the history
When streaming mode is enabled, lower some operations and disable some code paths;
to force generateing code compatible to streaming mode.
Add streaming-mode flag for new sve-fixed-length testing files:
build_vector.ll
concat.ll
extract-subvector.ll
extract-vector-elt.ll
int-shifts.ll
loads.ll
shuffle.ll
stores.ll

Differential Revision: https://reviews.llvm.org/D135564
  • Loading branch information
hassnaaHamdi committed Oct 31, 2022
1 parent 325a308 commit 681888e
Show file tree
Hide file tree
Showing 11 changed files with 1,913 additions and 23 deletions.
65 changes: 52 additions & 13 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -1391,6 +1391,16 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);

if (Subtarget->forceStreamingCompatibleSVE()) {
for (MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
MVT::v4i32, MVT::v2i64})
addTypeForStreamingSVE(VT);

for (MVT VT :
{MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
addTypeForStreamingSVE(VT);
}

// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
Expand Down Expand Up @@ -1597,6 +1607,14 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
return false;
}

void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
}

void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");

Expand Down Expand Up @@ -5773,8 +5791,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::MLOAD:
return LowerMLOAD(Op, DAG);
case ISD::LOAD:
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
Subtarget->forceStreamingCompatibleSVE()))
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
return LowerLOAD(Op, DAG);
case ISD::ADD:
Expand Down Expand Up @@ -11400,9 +11417,13 @@ static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits,
const SDValue *LHS = nullptr) {
EVT VT = Op.getValueType();
if (VT.isFixedLengthVector() &&
DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
return SDValue();

if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
bool isAdvSIMDModImm = false;
uint64_t Shift;
Expand Down Expand Up @@ -11448,9 +11469,13 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const APInt &Bits,
const SDValue *LHS = nullptr) {
EVT VT = Op.getValueType();
if (VT.isFixedLengthVector() &&
DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
return SDValue();

if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
EVT VT = Op.getValueType();
MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
bool isAdvSIMDModImm = false;
uint64_t Shift;
Expand Down Expand Up @@ -12128,7 +12153,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,

SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
Subtarget->forceStreamingCompatibleSVE()))
return LowerFixedLengthConcatVectorsToSVE(Op, DAG);

assert(Op.getValueType().isScalableVector() &&
Expand Down Expand Up @@ -12234,7 +12260,8 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
}

if (useSVEForFixedLengthVectorVT(VT))
if (useSVEForFixedLengthVectorVT(VT,
Subtarget->forceStreamingCompatibleSVE()))
return LowerFixedLengthExtractVectorElt(Op, DAG);

// Check for non-constant or out of range lane.
Expand Down Expand Up @@ -12296,10 +12323,11 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
InVT.getSizeInBits() == 128)
InVT.getSizeInBits() == 128 && !Subtarget->forceStreamingCompatibleSVE())
return Op;

if (useSVEForFixedLengthVectorVT(InVT)) {
if (useSVEForFixedLengthVectorVT(InVT,
Subtarget->forceStreamingCompatibleSVE())) {
SDLoc DL(Op);

EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
Expand Down Expand Up @@ -12487,7 +12515,8 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {

bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Currently no fixed length shuffles that require SVE are legal.
if (useSVEForFixedLengthVectorVT(VT))
if (useSVEForFixedLengthVectorVT(VT,
Subtarget->forceStreamingCompatibleSVE()))
return false;

if (VT.getVectorNumElements() == 4 &&
Expand Down Expand Up @@ -12597,7 +12626,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,

switch (Op.getOpcode()) {
case ISD::SHL:
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
if (VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(VT,
Subtarget->forceStreamingCompatibleSVE()))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);

if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
Expand All @@ -12609,7 +12640,9 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
Op.getOperand(0), Op.getOperand(1));
case ISD::SRA:
case ISD::SRL:
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
if (VT.isScalableVector() ||
useSVEForFixedLengthVectorVT(
VT, Subtarget->forceStreamingCompatibleSVE())) {
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
: AArch64ISD::SRL_PRED;
return LowerToPredicatedOp(Op, DAG, Opc);
Expand Down Expand Up @@ -14008,6 +14041,11 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
// Skip if streaming compatible SVE is enabled, because it generates invalid
// code in streaming mode when SVE length is not specified.
if (Subtarget->forceStreamingCompatibleSVE())
return false;

assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");

Expand Down Expand Up @@ -22489,7 +22527,7 @@ SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
assert(useSVEForFixedLengthVectorVT(VT) &&
assert(VT.isFixedLengthVector() && isTypeLegal(VT) &&
"Only expected to lower fixed length vector operation!");
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);

Expand All @@ -22505,7 +22543,8 @@ SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
}

// "cast" fixed length vector to a scalable vector.
assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
assert(V.getValueType().isFixedLengthVector() &&
isTypeLegal(V.getValueType()) &&
"Only fixed length vectors are supported!");
Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Expand Up @@ -899,6 +899,7 @@ class AArch64TargetLowering : public TargetLowering {
bool isExtFreeImpl(const Instruction *Ext) const override;

void addTypeForNEON(MVT VT);
void addTypeForStreamingSVE(MVT VT);
void addTypeForFixedLengthSVE(MVT VT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -3032,7 +3032,7 @@ let Predicates = [HasSVEorSME] in {
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;

// Extract element from vector with immediate index that's within the bottom 128-bits.
let AddedComplexity = 1 in {
let Predicates = [NotInStreamingSVEMode], AddedComplexity = 1 in {
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
(i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
Expand All @@ -3041,8 +3041,9 @@ let Predicates = [HasSVEorSME] in {
(i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
(i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
}
} // End NotInStreamingSVEMode

let Predicates = [NotInStreamingSVEMode] in {
def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8),
(i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
def : Pat<(sext_inreg (anyext (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), i8),
Expand All @@ -3055,6 +3056,7 @@ let Predicates = [HasSVEorSME] in {

def : Pat<(sext (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
(i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
} // End NotInStreamingSVEMode

// Extract first element from vector.
let AddedComplexity = 2 in {
Expand Down
@@ -0,0 +1,138 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -force-streaming-compatible-sve < %s | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

define void @build_vector_7_inc1_v4i1(ptr %a) #0 {
; CHECK-LABEL: build_vector_7_inc1_v4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #5
; CHECK-NEXT: strb w8, [x0]
; CHECK-NEXT: ret
store <4 x i1> <i1 true, i1 false, i1 true, i1 false>, ptr %a, align 1
ret void
}

define void @build_vector_7_inc1_v32i8(ptr %a) #0 {
; CHECK-LABEL: build_vector_7_inc1_v32i8:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.b, #0, #1
; CHECK-NEXT: mov z1.d, z0.d
; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
; CHECK-NEXT: add z1.b, z1.b, #23 // =0x17
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
store <32 x i8> <i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38>, ptr %a, align 1
ret void
}

define void @build_vector_0_inc2_v16i16(ptr %a) #0 {
; CHECK-LABEL: build_vector_0_inc2_v16i16:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.h, #0, #2
; CHECK-NEXT: str q0, [x0]
; CHECK-NEXT: add z0.h, z0.h, #16 // =0x10
; CHECK-NEXT: str q0, [x0, #16]
; CHECK-NEXT: ret
store <16 x i16> <i16 0, i16 2, i16 4, i16 6, i16 8, i16 10, i16 12, i16 14, i16 16, i16 18, i16 20, i16 22, i16 24, i16 26, i16 28, i16 30>, ptr %a, align 2
ret void
}

; Negative const stride.
define void @build_vector_0_dec3_v8i32(ptr %a) #0 {
; CHECK-LABEL: build_vector_0_dec3_v8i32:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.s, #0, #-3
; CHECK-NEXT: mov z1.s, #-12 // =0xfffffffffffffff4
; CHECK-NEXT: add z1.s, z0.s, z1.s
; CHECK-NEXT: stp q0, q1, [x0]
; CHECK-NEXT: ret
store <8 x i32> <i32 0, i32 -3, i32 -6, i32 -9, i32 -12, i32 -15, i32 -18, i32 -21>, ptr %a, align 4
ret void
}

; Constant stride that's too big to be directly encoded into the index.
define void @build_vector_minus2_dec32_v4i64(ptr %a) #0 {
; CHECK-LABEL: build_vector_minus2_dec32_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #-32
; CHECK-NEXT: mov z0.d, #-66 // =0xffffffffffffffbe
; CHECK-NEXT: mov z2.d, #-2 // =0xfffffffffffffffe
; CHECK-NEXT: index z1.d, #0, x8
; CHECK-NEXT: add z0.d, z1.d, z0.d
; CHECK-NEXT: add z1.d, z1.d, z2.d
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
store <4 x i64> <i64 -2, i64 -34, i64 -66, i64 -98>, ptr %a, align 8
ret void
}

; Constant but not a sequence.
define void @build_vector_no_stride_v4i64(ptr %a) #0 {
; CHECK-LABEL: build_vector_no_stride_v4i64:
; CHECK: // %bb.0:
; CHECK-NEXT: index z0.d, #1, #7
; CHECK-NEXT: index z1.d, #0, #4
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
store <4 x i64> <i64 0, i64 4, i64 1, i64 8>, ptr %a, align 8
ret void
}

define void @build_vector_0_inc2_v16f16(ptr %a) #0 {
; CHECK-LABEL: build_vector_0_inc2_v16f16:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI6_0
; CHECK-NEXT: adrp x9, .LCPI6_1
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_0]
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI6_1]
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
store <16 x half> <half 0.0, half 2.0, half 4.0, half 6.0, half 8.0, half 10.0, half 12.0, half 14.0, half 16.0, half 18.0, half 20.0, half 22.0, half 24.0, half 26.0, half 28.0, half 30.0>, ptr %a, align 2
ret void
}

; Negative const stride.
define void @build_vector_0_dec3_v8f32(ptr %a) #0 {
; CHECK-LABEL: build_vector_0_dec3_v8f32:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI7_0
; CHECK-NEXT: adrp x9, .LCPI7_1
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI7_0]
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI7_1]
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
store <8 x float> <float 0.0, float -3.0, float -6.0, float -9.0, float -12.0, float -15.0, float -18.0, float -21.0>, ptr %a, align 4
ret void
}

; Constant stride that's too big to be directly encoded into the index.
define void @build_vector_minus2_dec32_v4f64(ptr %a) #0 {
; CHECK-LABEL: build_vector_minus2_dec32_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI8_0
; CHECK-NEXT: adrp x9, .LCPI8_1
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0]
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI8_1]
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
store <4 x double> <double -2.0, double -34.0, double -66.0, double -98.0>, ptr %a, align 8
ret void
}

; Constant but not a sequence.
define void @build_vector_no_stride_v4f64(ptr %a) #0 {
; CHECK-LABEL: build_vector_no_stride_v4f64:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI9_0
; CHECK-NEXT: adrp x9, .LCPI9_1
; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_0]
; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI9_1]
; CHECK-NEXT: stp q1, q0, [x0]
; CHECK-NEXT: ret
store <4 x double> <double 0.0, double 4.0, double 1.0, double 8.0>, ptr %a, align 8
ret void
}


attributes #0 = { "target-features"="+sve" }

0 comments on commit 681888e

Please sign in to comment.