Skip to content

Commit

Permalink
[CodeGen] Disable FP LD1RX instructions generation for Neoverse-V1
Browse files Browse the repository at this point in the history
These instructions show worse performance on Neoverse-V1 compared
to pair of LDR(LDP)/MOV instructions.
This patch adds `no-sve-fp-ld1r` sub-target feature, which is enabled
only on Neoverse-V1.

Fixes #64498

Differential Revision: https://reviews.llvm.org/D157279
  • Loading branch information
igogo-x86 committed Aug 9, 2023
1 parent d1b376f commit 60e2a84
Show file tree
Hide file tree
Showing 5 changed files with 415 additions and 179 deletions.
6 changes: 5 additions & 1 deletion llvm/lib/Target/AArch64/AArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ def FeatureExperimentalZeroingPseudos
def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
"UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;

def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",
"NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;

def FeatureSVE2 : SubtargetFeature<"sve2", "HasSVE2", "true",
"Enable Scalable Vector Extension 2 (SVE2) instructions (FEAT_SVE2)",
[FeatureSVE, FeatureUseScalarIncVL]>;
Expand Down Expand Up @@ -1137,7 +1140,8 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
FeatureLSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
FeaturePredictableSelectIsExpensive,
FeatureNoSVEFPLD1R]>;

def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2",
"Neoverse V2 ARM processors", [
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ def UseNegativeImmediates

def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;

def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;

def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;

def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
Expand Down
16 changes: 9 additions & 7 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2355,13 +2355,15 @@ let Predicates = [HasSVEorSME] in {
// LDR1 of 64-bit data
defm : LD1RPat<nxv2i64, load, LD1RD_IMM, PTRUE_D, i64, am_indexed64_6b, uimm6s8>;

// LD1R of FP data
defm : LD1RPat<nxv8f16, load, LD1RH_IMM, PTRUE_H, f16, am_indexed16_6b, uimm6s2>;
defm : LD1RPat<nxv4f16, load, LD1RH_S_IMM, PTRUE_S, f16, am_indexed16_6b, uimm6s2>;
defm : LD1RPat<nxv2f16, load, LD1RH_D_IMM, PTRUE_D, f16, am_indexed16_6b, uimm6s2>;
defm : LD1RPat<nxv4f32, load, LD1RW_IMM, PTRUE_S, f32, am_indexed32_6b, uimm6s4>;
defm : LD1RPat<nxv2f32, load, LD1RW_D_IMM, PTRUE_D, f32, am_indexed32_6b, uimm6s4>;
defm : LD1RPat<nxv2f64, load, LD1RD_IMM, PTRUE_D, f64, am_indexed64_6b, uimm6s8>;
let Predicates = [HasSVEorSME, UseSVEFPLD1R] in {
// LD1R of FP data
defm : LD1RPat<nxv8f16, load, LD1RH_IMM, PTRUE_H, f16, am_indexed16_6b, uimm6s2>;
defm : LD1RPat<nxv4f16, load, LD1RH_S_IMM, PTRUE_S, f16, am_indexed16_6b, uimm6s2>;
defm : LD1RPat<nxv2f16, load, LD1RH_D_IMM, PTRUE_D, f16, am_indexed16_6b, uimm6s2>;
defm : LD1RPat<nxv4f32, load, LD1RW_IMM, PTRUE_S, f32, am_indexed32_6b, uimm6s4>;
defm : LD1RPat<nxv2f32, load, LD1RW_D_IMM, PTRUE_D, f32, am_indexed32_6b, uimm6s4>;
defm : LD1RPat<nxv2f64, load, LD1RD_IMM, PTRUE_D, f64, am_indexed64_6b, uimm6s8>;
}

// LD1R of 128-bit masked data
multiclass ld1rq_pat<ValueType vt1, SDPatternOperator op, Instruction load_instr, ComplexPattern AddrCP>{
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -7203,6 +7203,10 @@ multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {

def : Pat<(nxv8f16 (op nxv8i1:$pg, f16:$splat, nxv8f16:$passthru)),
(!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
def : Pat<(nxv4f16 (op nxv4i1:$pg, f16:$splat, nxv4f16:$passthru)),
(!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
def : Pat<(nxv2f16 (op nxv2i1:$pg, f16:$splat, nxv2f16:$passthru)),
(!cast<Instruction>(NAME # _H) $passthru, $pg, $splat)>;
def : Pat<(nxv2f32 (op nxv2i1:$pg, f32:$splat, nxv2f32:$passthru)),
(!cast<Instruction>(NAME # _S) $passthru, $pg, $splat)>;
def : Pat<(nxv4f32 (op nxv4i1:$pg, f32:$splat, nxv4f32:$passthru)),
Expand Down
Loading

0 comments on commit 60e2a84

Please sign in to comment.