Skip to content

Commit

Permalink
[AArch64][GlobalISel] Add support for some across-vector NEON intrinsics
Browse files Browse the repository at this point in the history
Support uaddv, saddv, umaxv, smaxv, uminv, sminv, fmaxv, fminv,
fmaxnmv, fminnmv intrinsics in GlobalISel.

GlobalISelEmitter couldn't import SelectionDAG patterns containing nodes
with 8-bit result type, since they had untyped values. Therefore,
register type for FPR8 is set to i8 to eliminate untyped nodes in these
patterns.

Differential Revision: https://reviews.llvm.org/D146531
  • Loading branch information
dzhidzhoev committed Apr 5, 2023
1 parent fe963a8 commit 8f5db53
Show file tree
Hide file tree
Showing 9 changed files with 203 additions and 37 deletions.
105 changes: 105 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrGISel.td
Expand Up @@ -302,3 +302,108 @@ def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
(STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
(STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;

multiclass SIMDAcrossLanesSignedIntrinsicBHS<string baseOpc, Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
(i32 (SMOVvi8to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
(i64 0)))>;
def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
(i32 (SMOVvi8to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
(i64 0)))>;

def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
(i64 0)))>;
def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
(i32 (SMOVvi16to32
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
(i64 0)))>;

def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
ssub))>;
}

multiclass SIMDAcrossLanesUnsignedIntrinsicBHS<string baseOpc,
Intrinsic intOp> {
def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
ssub))>;
def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
ssub))>;

def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
ssub))>;
def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
ssub))>;

def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
ssub))>;
}


defm : SIMDAcrossLanesSignedIntrinsicBHS<"ADDV", int_aarch64_neon_saddv>;
// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
ssub))>;

defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"ADDV", int_aarch64_neon_uaddv>;
def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
ssub))>;

defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMAXV", int_aarch64_neon_smaxv>;
def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(SMAXPv2i32 V64:$Rn, V64:$Rn), dsub),
ssub))>;

defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMINV", int_aarch64_neon_sminv>;
def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(SMINPv2i32 V64:$Rn, V64:$Rn), dsub),
ssub))>;

defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMAXV", int_aarch64_neon_umaxv>;
def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(UMAXPv2i32 V64:$Rn, V64:$Rn), dsub),
ssub))>;

defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMINV", int_aarch64_neon_uminv>;
def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
(i32 (EXTRACT_SUBREG
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(UMINPv2i32 V64:$Rn, V64:$Rn), dsub),
ssub))>;
8 changes: 4 additions & 4 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -2804,7 +2804,7 @@ defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;

// Floating-point
defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>;
defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>;
defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>;
defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>;
defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>;
Expand Down Expand Up @@ -3569,7 +3569,7 @@ defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>;


// Floating-point
defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>;
defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>;
defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
Expand Down Expand Up @@ -3979,7 +3979,7 @@ defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
// (immediate pre-indexed)
def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>;
def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>;
def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>;
def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>;
def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>;
def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>;
def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>;
Expand Down Expand Up @@ -4033,7 +4033,7 @@ def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
// (immediate post-indexed)
def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>;
def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>;
def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>;
def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>;
def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>;
def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>;
def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/AArch64RegisterInfo.td
Expand Up @@ -435,7 +435,7 @@ def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
}

def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
def FPR8 : RegisterClass<"AArch64", [i8], 8, (sequence "B%u", 0, 31)> {
let Size = 8;
}
def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> {
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Expand Up @@ -3166,15 +3166,15 @@ let Predicates = [HasSVEorSME] in {
let Predicates = [NotInStreamingSVEMode] in {
def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8),
(i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
def : Pat<(sext_inreg (anyext (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), i8),
def : Pat<(sext_inreg (anyext (i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index))), i8),
(i64 (SMOVvi8to64 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;

def : Pat<(sext_inreg (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index), i16),
(i32 (SMOVvi16to32 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
def : Pat<(sext_inreg (anyext (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), i16),
def : Pat<(sext_inreg (anyext (i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index))), i16),
(i64 (SMOVvi16to64 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;

def : Pat<(sext (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
def : Pat<(sext (i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index))),
(i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
} // End NotInStreamingSVEMode

Expand Down
32 changes: 26 additions & 6 deletions llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
Expand Up @@ -481,14 +481,35 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
getValueMapping(RBIdx, Size), NumOperands);
}

/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
static bool isFPIntrinsic(unsigned ID) {
/// \returns true if a given intrinsic only uses and defines FPRs.
static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
const MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC);
// TODO: Add more intrinsics.
switch (ID) {
switch (MI.getIntrinsicID()) {
default:
return false;
case Intrinsic::aarch64_neon_uaddlv:
case Intrinsic::aarch64_neon_uaddv:
case Intrinsic::aarch64_neon_umaxv:
case Intrinsic::aarch64_neon_uminv:
case Intrinsic::aarch64_neon_fmaxv:
case Intrinsic::aarch64_neon_fminv:
case Intrinsic::aarch64_neon_fmaxnmv:
case Intrinsic::aarch64_neon_fminnmv:
return true;
case Intrinsic::aarch64_neon_saddlv: {
const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
return SrcTy.getElementType().getSizeInBits() >= 16 &&
SrcTy.getElementCount().getFixedValue() >= 4;
}
case Intrinsic::aarch64_neon_saddv:
case Intrinsic::aarch64_neon_smaxv:
case Intrinsic::aarch64_neon_sminv: {
const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
return SrcTy.getElementType().getSizeInBits() >= 32 &&
SrcTy.getElementCount().getFixedValue() >= 2;
}
}
}

Expand All @@ -497,7 +518,7 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
unsigned Depth) const {
unsigned Op = MI.getOpcode();
if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID()))
if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
return true;

// Do we have an explicit floating point instruction?
Expand Down Expand Up @@ -996,9 +1017,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_INTRINSIC: {
// Check if we know that the intrinsic has any constraints on its register
// banks. If it does, then update the mapping accordingly.
unsigned ID = MI.getIntrinsicID();
unsigned Idx = 0;
if (!isFPIntrinsic(ID))
if (!isFPIntrinsic(MRI, MI))
break;
for (const auto &Op : MI.explicit_operands()) {
if (Op.isReg())
Expand Down
84 changes: 61 additions & 23 deletions llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s
; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=generic | FileCheck %s -check-prefixes=CHECK,SDAG
; RUN: llc < %s -global-isel=1 -global-isel-abort=2 -mtriple=aarch64-eabi -aarch64-neon-syntax=generic 2>&1 | FileCheck %s --check-prefixes=CHECK,GISEL

; Function Attrs: nounwind readnone
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
Expand All @@ -9,6 +10,14 @@ declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)

; GISEL-NOT: Instruction selection used fallback path for add_B
; GISEL-NOT: Instruction selection used fallback path for add_H
; GISEL-NOT: Instruction selection used fallback path for add_S
; GISEL-NOT: Instruction selection used fallback path for add_D
; GISEL-NOT: Instruction selection used fallback path for oversized_ADDV_512
; GISEL-NOT: Instruction selection used fallback path for addv_combine_i32
; GISEL-NOT: Instruction selection used fallback path for addv_combine_i64

define i8 @add_B(ptr %arr) {
; CHECK-LABEL: add_B:
; CHECK: // %bb.0:
Expand Down Expand Up @@ -84,16 +93,27 @@ entry:
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)

define i32 @oversized_ADDV_512(ptr %arr) {
; CHECK-LABEL: oversized_ADDV_512:
; CHECK: // %bb.0:
; CHECK-NEXT: ldp q0, q1, [x0, #32]
; CHECK-NEXT: ldp q3, q2, [x0]
; CHECK-NEXT: add v0.4s, v3.4s, v0.4s
; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
; SDAG-LABEL: oversized_ADDV_512:
; SDAG: // %bb.0:
; SDAG-NEXT: ldp q0, q1, [x0, #32]
; SDAG-NEXT: ldp q3, q2, [x0]
; SDAG-NEXT: add v0.4s, v3.4s, v0.4s
; SDAG-NEXT: add v1.4s, v2.4s, v1.4s
; SDAG-NEXT: add v0.4s, v0.4s, v1.4s
; SDAG-NEXT: addv s0, v0.4s
; SDAG-NEXT: fmov w0, s0
; SDAG-NEXT: ret
;
; GISEL-LABEL: oversized_ADDV_512:
; GISEL: // %bb.0:
; GISEL-NEXT: ldp q0, q1, [x0]
; GISEL-NEXT: ldp q2, q3, [x0, #32]
; GISEL-NEXT: add v0.4s, v0.4s, v1.4s
; GISEL-NEXT: add v1.4s, v2.4s, v3.4s
; GISEL-NEXT: add v0.4s, v0.4s, v1.4s
; GISEL-NEXT: addv s0, v0.4s
; GISEL-NEXT: fmov w0, s0
; GISEL-NEXT: ret
%bin.rdx = load <16 x i32>, ptr %arr
%r = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %bin.rdx)
ret i32 %r
Expand Down Expand Up @@ -128,12 +148,21 @@ entry:
}

define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) {
; CHECK-LABEL: addv_combine_i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
; SDAG-LABEL: addv_combine_i32:
; SDAG: // %bb.0: // %entry
; SDAG-NEXT: add v0.4s, v0.4s, v1.4s
; SDAG-NEXT: addv s0, v0.4s
; SDAG-NEXT: fmov w0, s0
; SDAG-NEXT: ret
;
; GISEL-LABEL: addv_combine_i32:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: addv s0, v0.4s
; GISEL-NEXT: addv s1, v1.4s
; GISEL-NEXT: fmov w8, s0
; GISEL-NEXT: fmov w9, s1
; GISEL-NEXT: add w0, w8, w9
; GISEL-NEXT: ret
entry:
%rdx.1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a1)
%rdx.2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2)
Expand All @@ -142,12 +171,21 @@ entry:
}

define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) {
; CHECK-LABEL: addv_combine_i64:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
; CHECK-NEXT: addp d0, v0.2d
; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
; SDAG-LABEL: addv_combine_i64:
; SDAG: // %bb.0: // %entry
; SDAG-NEXT: add v0.2d, v0.2d, v1.2d
; SDAG-NEXT: addp d0, v0.2d
; SDAG-NEXT: fmov x0, d0
; SDAG-NEXT: ret
;
; GISEL-LABEL: addv_combine_i64:
; GISEL: // %bb.0: // %entry
; GISEL-NEXT: addp d0, v0.2d
; GISEL-NEXT: addp d1, v1.2d
; GISEL-NEXT: fmov x8, d0
; GISEL-NEXT: fmov x9, d1
; GISEL-NEXT: add x0, x8, x9
; GISEL-NEXT: ret
entry:
%rdx.1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a1)
%rdx.2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a2)
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/arm64-fminv.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
; RUN: llc -global-isel=1 -mtriple=arm64-linux-gnu -o - %s | FileCheck %s

define float @test_fminv_v2f32(<2 x float> %in) {
; CHECK: test_fminv_v2f32:
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/arm64-neon-across.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
; RUN: llc < %s -global-isel=1 -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s

declare float @llvm.aarch64.neon.fminnmv.f32.v4f32(<4 x float>)

Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll
@@ -1,4 +1,5 @@
; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
; RUN: llc -global-isel=1 -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s

declare <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8>, <8 x i8>)

Expand Down

0 comments on commit 8f5db53

Please sign in to comment.