[AArch64] Remove copy in SVE/SME predicate spill and fill #81716

SamTebbs33 · 2024-02-14T09:18:02Z

7dc20ab introduced an extra COPY when spilling and filling a PNR register, which can't be elided as the input (PNR predicate) and output (PPR predicate) register classes differ. This patch constrains the register class given to the spill and fill so that the classes match and can be elided. The patch also adds a new register class that covers both PPR and PNR so that STR_PXI and LDR_PXI can take either of them.

llvmbot · 2024-02-14T09:18:35Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-aarch64

Author: Sam Tebbs (SamTebbs33)

Changes

7dc20ab introduced an extra COPY when spilling a PNR register, which can't be elided as the input (PNR predicate) and output (PPR predicate) register classes differ. This patch emits a new ConvertPNRtoPPR pseudo instruction instead. When this is expanded, it gets erased if the PNR is a subregister of the PPR, since the conversion is implicit, otherwise it is lowered to an ORR.

Full diff: https://github.com/llvm/llvm-project/pull/81716.diff

5 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (+17)
(modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+4-2)
(modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+2)
(added) llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir (+30)
(modified) llvm/test/CodeGen/AArch64/spillfill-sve.mir (+1-2)

diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 352c61d48e2fff..7dfd4ed5b38cff 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1119,6 +1119,23 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
   default:
     break;
 
+  case AArch64::ConvertPNRtoPPR: {
+    auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+    MachineOperand DstMO = MI.getOperand(0);
+    MachineOperand SrcMO = MI.getOperand(1);
+    unsigned SrcReg = SrcMO.getReg();
+    if (!TRI->isSubRegister(DstMO.getReg(), SrcReg)) {
+      unsigned SrcSuperReg = TRI->getMatchingSuperReg(SrcReg, AArch64::psub,
+                                                      &AArch64::PPRRegClass);
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_PPzPP))
+          .add(DstMO)
+          .addReg(SrcSuperReg)
+          .addReg(SrcSuperReg)
+          .addReg(SrcSuperReg);
+    }
+    MI.eraseFromParent();
+    return true;
+  }
   case AArch64::BSPv8i8:
   case AArch64::BSPv16i8: {
     Register DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 656259727c124f..01c3a1c92d07d7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4789,6 +4789,7 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   bool Offset = true;
   MCRegister PNRReg = MCRegister::NoRegister;
   unsigned StackID = TargetStackID::Default;
+  const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
   switch (TRI->getSpillSize(*RC)) {
   case 1:
     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
@@ -4807,8 +4808,9 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
              "Unexpected register store without SVE2p1 or SME2");
       if (SrcReg.isVirtual()) {
         auto NewSrcReg =
-            MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
-        BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg)
+            MF.getRegInfo().createVirtualRegister(&AArch64::PPR_p8to15RegClass);
+        BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ConvertPNRtoPPR),
+                NewSrcReg)
             .addReg(SrcReg);
         SrcReg = NewSrcReg;
       } else
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c4d69232c9e30e..92d4c3774908e2 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2308,6 +2308,8 @@ let Predicates = [HasBF16, HasSVEorSME] in {
   defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
 } // End HasBF16, HasSVEorSME
 
+def ConvertPNRtoPPR : Pseudo<(outs PPRAny:$Pd), (ins PNRAny:$Pm), []>, Sched<[]>;
+
 let Predicates = [HasSVEorSME] in {
   // InstAliases
   def : InstAlias<"mov $Zd, $Zn",
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir b/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
new file mode 100644
index 00000000000000..abbc1d615b66f7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve-different-predicate.mir
@@ -0,0 +1,30 @@
+# RUN: llc -mtriple=aarch64-linux-gnu -start-after=virtregrewriter -stop-after=aarch64-expand-pseudo -mattr=+sme2 -verify-machineinstrs -o - %s \
+# RUN: | FileCheck %s
+
+--- |
+  target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+  target triple = "aarch64-unknown-linux-gnu"
+
+  define void @test_convert_different_reg() #0 { entry: unreachable }
+
+  attributes #0 = { "target-features"="+sme2" }
+
+---
+name:            test_convert_different_reg
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    ; CHECK-LABEL: name: test_convert_different_reg
+    ; CHECK: renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
+    ; CHECK-NEXT: renamable $p9 = ORR_PPzPP $p8, $p8, $p8
+    ; CHECK-NEXT: STR_PXI killed renamable $p9, $sp, 7
+    ; CHECK-NEXT: renamable $p0 = LDR_PXI $sp, 7
+    early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16
+    frame-setup CFI_INSTRUCTION escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22
+    frame-setup CFI_INSTRUCTION offset $w29, -16
+    renamable $pn8 = WHILEGE_CXX_B undef $x0, undef $x0, 0, implicit-def dead $nzcv
+    renamable $p9 = ConvertPNRtoPPR killed renamable $pn8
+    STR_PXI killed renamable $p9, $sp, 7
+    renamable $p0 = LDR_PXI $sp, 7
+    early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16
+    RET undef $lr
diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.mir b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
index ef7d55a1c2395f..af062d33c5642f 100644
--- a/llvm/test/CodeGen/AArch64/spillfill-sve.mir
+++ b/llvm/test/CodeGen/AArch64/spillfill-sve.mir
@@ -213,8 +213,7 @@ body:             |
 
     ; EXPAND-LABEL: name: spills_fills_stack_id_virtreg_pnr
     ; EXPAND: renamable $pn8 = WHILEGE_CXX_B
-    ; EXPAND: $p0 = ORR_PPzPP $p8, $p8, killed $p8
-    ; EXPAND: STR_PXI killed renamable $p0, $sp, 7
+    ; EXPAND: STR_PXI killed renamable $p8, $sp, 7
     ;
     ; EXPAND: renamable $p0 = LDR_PXI $sp, 7
     ; EXPAND: $p8 = ORR_PPzPP $p0, $p0, killed $p0, implicit-def $pn8

SamTebbs33 · 2024-02-15T10:50:40Z

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

@@ -4807,8 +4808,9 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
             "Unexpected register store without SVE2p1 or SME2");
      if (SrcReg.isVirtual()) {
        auto NewSrcReg =
-            MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
-        BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg)
+            MF.getRegInfo().createVirtualRegister(&AArch64::PPR_p8to15RegClass);


Changing the source register to p8to15 gives it the chance to overlap with the whilege in the test. Without this change the source register will be allocated to p0, as that is the first register in the PPR register class allocation order, and we can't elide the copy. If anyone else can think of a better approach to get regalloc to make the convert and whilege share registers (e.g. p8 and pn8 respectively), please do mention it.

Hi; I just tried an experiment to constrain the register class to a common superclass instead of introducing a copy and eliding it later. This results in 'pn8' being stored instead of 'p8', but that may be allowable syntax given some discussions I had -- I haven't found much guidance on that. I'm not sure if this the the best approach, but it may be worth investigating.

If there are problems with it, we might want to revisit our current register class hierarchy at some point.

Suggested change

MF.getRegInfo().createVirtualRegister(&AArch64::PPR_p8to15RegClass);

if (SrcReg.isVirtual()) {

MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::PPRRegClass);

}...

I like that approach, thanks Graham. It can also be used to get rid of the fill copy.

I'll implement this and adjust the tests accordingly.

dtemirbulatov

LGTM

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

llvm/test/CodeGen/AArch64/spillfill-sve.mir

github-actions · 2024-02-28T14:05:19Z

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:

git-clang-format --diff 341d674b6f1863d027ed30c44a14cd32599eb42d 03aabd26e1f2c0b5c9360eb1f48102b69937a10c -- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp

View the diff from clang-format here.

diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index c5dc2240cd..ab0ecc85a6 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -6167,8 +6167,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode,
         "4 registers apart, and the first register in the range [z0, z3] or "
         "[z16, z19] and with correct element type");
   case Match_AddSubLSLImm3ShiftLarge:
-    return Error(Loc,
-      "expected 'lsl' with optional integer in range [0, 7]");
+    return Error(Loc, "expected 'lsl' with optional integer in range [0, 7]");
   default:
     llvm_unreachable("unexpected error code!");
   }

sdesmalen-arm · 2024-02-28T14:20:16Z

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

@@ -996,6 +996,16 @@ let Namespace = "AArch64" in {
  def psub1 : SubRegIndex<16, -1>;
 }

+class PPRorPNRClass : RegisterClass<
+                                  "AArch64",
+                                  [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,


Suggested change

[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,

[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16,

sdesmalen-arm · 2024-02-28T14:29:15Z

llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir

    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0
    ; CHECK-NEXT: $w0 = COPY [[COPY]](s32)
    ; CHECK-NEXT: RET_ReallyLR implicit $w0
-    INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:GPR32common */, def %0:gpr32common
+    INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0:gpr32common


Suggested change

INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1769482 /* regdef:GPR32common */, def %0:gpr32common

INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0:gpr32common

(same in other places)

We can only use the regex in a CHECK statement, not in the IR being tested.

Ah I didn't spot the leading ;!

In that case, I would suggest not using a regex for these numbers, because they must match the ones from the MIR.

Good point, I've changed the .mir tests to explicitly match the regclass numbers.

sdesmalen-arm · 2024-02-28T14:30:48Z

llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll

@@ -54,7 +54,7 @@ entry:
 define i32 @test_single_register_output() nounwind ssp {
  ; CHECK-LABEL: name: test_single_register_output
  ; CHECK: bb.1.entry:
-  ; CHECK-NEXT:   INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1703946 /* regdef:GPR32common */, def %0
+  ; CHECK-NEXT:   INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, {{[0-9]+}} /* regdef:GPR32common */, def %0


It would be nice if these changes (same for the other files) could be committed as an NFC change separate from this PR, and then have this PR rebased on top.

Sounds good to me 👍

sdesmalen-arm · 2024-02-28T14:33:43Z

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

+def PPRorPNR    : PPRorPNRClass;
+def PPRorPNRAsmOpAny   : PPRAsmOperand<"PPRorPNRAny",   "PPRorPNR",     0>;
+def PPRorPNRAny   : PPRRegOp<"",  PPRorPNRAsmOpAny,   ElementSizeNone, PPRorPNR>;


minor nit: strange formatting.

Suggested change

def PPRorPNR : PPRorPNRClass;

def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>;

def PPRorPNRAny : PPRRegOp<"", PPRorPNRAsmOpAny, ElementSizeNone, PPRorPNR>;

def PPRorPNR : PPRorPNRClass;

def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny", "PPRorPNR", 0>;

def PPRorPNRAny : PPRRegOp<"", PPRorPNRAsmOpAny, ElementSizeNone, PPRorPNR>;

sdesmalen-arm · 2024-02-28T14:39:51Z

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

+  let Size = 16;
+}
+def PPRorPNR    : PPRorPNRClass;
+def PPRorPNRAsmOpAny   : PPRAsmOperand<"PPRorPNRAny",   "PPRorPNR",     0>;


You need to add a case statement for `Match_InvalidSVEPPRorPNRAnyReg to:

AArch64AsmParser::showMatchError

AArch64AsmParser::MatchAndEmitInstruction

sdesmalen-arm · 2024-02-28T14:43:42Z

llvm/lib/Target/AArch64/SVEInstrFormats.td

@@ -6680,7 +6680,7 @@ multiclass sve_mem_z_spill<string asm> {
 }

 class sve_mem_p_spill<string asm>
-: I<(outs), (ins PPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9),
+: I<(outs), (ins PPRorPNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9),


This removes the need for having the following InstAliases in AArch64SVEInstrInfo.td:

// Aliases for existing SVE instructions for which predicate-as-counter are // accepted as an operand to the instruction def : InstAlias<"ldr $Pt, [$Rn, $imm9, mul vl]", (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; def : InstAlias<"ldr $Pt, [$Rn]", (LDR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>; def : InstAlias<"str $Pt, [$Rn, $imm9, mul vl]", (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; def : InstAlias<"str $Pt, [$Rn]", (STR_PXI PNRasPPRAny:$Pt, GPR64sp:$Rn, 0), 0>;

Can you remove them?

sdesmalen-arm

LGTM with nit addressed.

sdesmalen-arm · 2024-03-01T08:53:59Z

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

+  let Size = 16;
+}
+def PPRorPNR         : PPRorPNRClass;
+def PPRorPNRAsmOpAny : PPRAsmOperand<"PPRorPNRAny",   "PPRorPNR",     0>;


nit: still strange formatting with lots of spaces.

Oh right I missed those ones, thanks! I would have expected clang-format to address this.

7dc20ab introduced an extra COPY when spilling a PNR register, which can't be elided as the input (PNR predicate) and output (PPR predicate) register classes differ. This patch emits a new ConvertPNRtoPPR pseudo instruction instead. When this is expanded, it gets erased if the PNR is a subregister of the PPR, since the conversion is implicit, otherwise it is lowered to an ORR.

SamTebbs33 · 2024-03-13T16:51:39Z

I've force-pushed to trigger the builds again and get rid of the unrelated mlir failure. This should be ready for another look with the new assembly parser functions, thanks!

sdesmalen-arm

Thanks for the updates to this patch, this is starting to look good!

sdesmalen-arm · 2024-03-15T09:22:07Z

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

-            .addReg(SrcReg);
-        SrcReg = NewSrcReg;
-      } else
+      if (SrcReg.isVirtual())


Now that you've made the instructions accept both p0-p15 and pn0-pn15, we can remove all the code that tries to handle PNR registers differently from P registers in storeRegToStackSlot and loadRegFromStackSlot.

Good idea, done.

sdesmalen-arm · 2024-03-15T09:24:58Z

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

+      Reg = Reg - AArch64::PN0 + AArch64::P0;
+    Inst.addOperand(MCOperand::createReg(Reg));
+  }
+


Can addPNRasPPRRegOperands be removed now? Or perhaps I should ask: is PNRasPPR still required, or can those instructions that use it also use PPRorPNRRegOperand ?

PPRasPNR can be removed. Thanks for the idea.

sdesmalen-arm · 2024-03-15T09:36:03Z

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

+    assert(N == 1 && "Invalid number of operands!");
+    unsigned Reg = getReg();
+    // Normalise to PPR
+    if (Reg >= AArch64::PN0)


nit:

Suggested change

if (Reg >= AArch64::PN0)

if (Reg >= AArch64::PN0 && Reg <= AArch64::PN15)

sdesmalen-arm · 2024-03-15T09:47:32Z

llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp

@@ -741,6 +744,18 @@ static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
  return Success;
 }

+static DecodeStatus DecodePPRorPNRRegisterClass(MCInst &Inst, unsigned RegNo,


Cleanup suggestion for a separate patch:

All these decoder classes looks rather identical, it would be nice to clean this up with something like this:

template <unsigned RegClassID, unsigned FirstReg, unsigned NumRegsInClass> static DecodeStatus DecodeSimpleReg(MCInst &Inst, unsigned RegNo, uint64_t Addr, const MCDisassembler *Decoder) { if (RegNo > (NumRegsInClass-1)) return Fail; unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo); Inst.addOperand(MCOperand::createReg(Register + FirstReg)); return Success; }

And then remove the other functions and just specify e.g. DecodeSimpleReg<AArch64::PPRorPNRRegClassID, 0, 16> in the .td file.

That sounds good to me. We can do it in a separate patch.

sdesmalen-arm · 2024-04-04T11:51:13Z

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

+    else {
+      bool IsPPR = AArch64::PPRRegClass.hasSubClassEq(RC);
+      bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
+      if (IsPPR || IsPNR) {
+        assert((!IsPPR || Subtarget.hasSVEorSME()) &&
+               "Unexpected register store without SVE store instructions");
+        assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
+               "Unexpected register store without SVE2p1 or SME2");
+        Opc = AArch64::STR_PXI;
+        StackID = TargetStackID::ScalableVector;
+        if (SrcReg.isVirtual())
+          MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::PPRRegClass);
+        else if (IsPNR)
+          // Normalise to PPR
+          SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0;
+      }


Now that the LDR and STR instructions accept a predicate-as-counter register, we can remove this trick to change the register class and simplify this code to:

Suggested change

else {

bool IsPPR = AArch64::PPRRegClass.hasSubClassEq(RC);

bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);

if (IsPPR || IsPNR) {

assert((!IsPPR || Subtarget.hasSVEorSME()) &&

"Unexpected register store without SVE store instructions");

assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&

"Unexpected register store without SVE2p1 or SME2");

Opc = AArch64::STR_PXI;

StackID = TargetStackID::ScalableVector;

if (SrcReg.isVirtual())

MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::PPRRegClass);

else if (IsPNR)

// Normalise to PPR

SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0;

}

else if (AArch64::PPRRegClass.hasSubClassEq(RC) ||

AArch64::PNRRegClass.hasSubClassEq(RC)) {

assert(...);

Opc = AArch64::STR_PXI;

StackID = TargetStackID::ScalableVector;

}

same for the other case in loadRegFromStackSlot

You're right! Thanks. I've kept the IsPNR and IsPPR variables though so we can emit the right assertions.

sdesmalen-arm · 2024-04-04T11:53:08Z

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

+
+    if ((isSVEPredicateAsCounterReg<Class>() ||
+         isSVEPredicateVectorRegOfWidth<ElementWidth, Class>()) &&
+        (Reg.ElementWidth == ElementWidth))


nit: no parenthesis needed in:

Suggested change

(Reg.ElementWidth == ElementWidth))

Reg.ElementWidth == ElementWidth)

sdesmalen-arm · 2024-04-04T11:56:09Z

llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp

@@ -6714,7 +6750,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
  case Match_InvalidSVEVectorListStrided4x16:
  case Match_InvalidSVEVectorListStrided4x32:
  case Match_InvalidSVEVectorListStrided4x64:
-  case Match_InvalidSVEPNRasPPRPredicateBReg:
+  case Match_InvalidSVEPPRorPNRBReg:


nit: should this be together with case Match_InvalidSVEPPRorPNRAnyReg?

If we keep it here we retain the same error messages as the older reg class. I can move it up but it will require updating quite a few tests.

sdesmalen-arm

LGTM wit little nit addressed!

sdesmalen-arm · 2024-04-09T07:31:08Z

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

+        assert((!IsPPR || Subtarget.hasSVEorSME()) &&
+               "Unexpected register store without SVE store instructions");


nit: this can be

Suggested change

assert((!IsPPR || Subtarget.hasSVEorSME()) &&

"Unexpected register store without SVE store instructions");

assert(Subtarget.hasSVEorSME() &&

"Unexpected register store without SVE store instructions");

because both PPR and PNR require at least either SME or SVE. (same for the assert on line 4996)

SamTebbs33 requested review from dtemirbulatov, sdesmalen-arm, kmclaughlin-arm and MDevereau February 14, 2024 09:18

llvmbot added the backend:AArch64 label Feb 14, 2024

SamTebbs33 requested a review from huntergr-arm February 15, 2024 10:01

SamTebbs33 commented Feb 15, 2024

View reviewed changes

SamTebbs33 changed the title ~~[AArch64] Remove copy in SVE/SME predicate spill~~ [AArch64] Remove copy in SVE/SME predicate spill and fill Feb 21, 2024

dtemirbulatov approved these changes Feb 22, 2024

View reviewed changes

sdesmalen-arm requested changes Feb 22, 2024

View reviewed changes

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp Outdated Show resolved Hide resolved

llvm/test/CodeGen/AArch64/spillfill-sve.mir Show resolved Hide resolved

llvmbot added the llvm:globalisel label Feb 28, 2024

sdesmalen-arm reviewed Feb 28, 2024

View reviewed changes

SamTebbs33 force-pushed the spillfill-pred branch from 8623267 to 5d8fb1f Compare February 29, 2024 15:02

sdesmalen-arm approved these changes Mar 1, 2024

View reviewed changes

SamTebbs33 added 13 commits March 13, 2024 16:49

fixup: constrain reg class instead of convert

929b111

fixup: let predicate spill and fill take ppr or pnr

514b9b5

fixup: remove braces

264a844

fixup: add decode function

53a3177

fixup: remove inst aliases

6a4502c

fixup: match reg class in AArch64AsmParser

808ddbb

Add svcount to reg class

ed8519e

fixup: format definitions

6a55451

fixup: use reg class numbers in MIR tests

a229c76

fixup: Add parsing and rendering functions

8c5b567

fixup: remove spaces

a64ca22

fixup: format decode function

9e59c45

SamTebbs33 force-pushed the spillfill-pred branch from f830825 to 9e59c45 Compare March 13, 2024 16:50

sdesmalen-arm reviewed Mar 15, 2024

View reviewed changes

SamTebbs33 added 3 commits April 2, 2024 16:16

fixup: merge pprorpnr and ppraspnr handling

1a398e1

fixup: combine ppr and pnr stack slot handling

2f6d418

fixup: add <= pn15 to asm parser

f06c0ce

sdesmalen-arm reviewed Apr 4, 2024

View reviewed changes

SamTebbs33 added 2 commits April 5, 2024 16:55

fixup: simplify load/storeRegToStackSlot

23b051b

fixup: remove extra parentheses

a53aeb4

sdesmalen-arm approved these changes Apr 9, 2024

View reviewed changes

fixup: remove IsPPR and move error match case statement

03aabd2

SamTebbs33 merged commit fb8dbd1 into llvm:main Apr 9, 2024
3 of 4 checks passed

SamTebbs33 deleted the spillfill-pred branch April 9, 2024 15:17

	[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
	[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16,

	INLINEASM &"mov ${0:w}, 7", 0 /* attdialect /, 1769482 / regdef:GPR32common */, def %0:gpr32common
	INLINEASM &"mov ${0:w}, 7", 0 /* attdialect /, {{[0-9]+}} / regdef:GPR32common */, def %0:gpr32common

	if (Reg >= AArch64::PN0)
	if (Reg >= AArch64::PN0 && Reg <= AArch64::PN15)

	(Reg.ElementWidth == ElementWidth))
	Reg.ElementWidth == ElementWidth)

		assert((!IsPPR \|\| Subtarget.hasSVEorSME()) &&
		"Unexpected register store without SVE store instructions");

[AArch64] Remove copy in SVE/SME predicate spill and fill #81716

[AArch64] Remove copy in SVE/SME predicate spill and fill #81716

Conversation

SamTebbs33 commented Feb 14, 2024 • edited

llvmbot commented Feb 14, 2024 • edited

SamTebbs33 Feb 15, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

dtemirbulatov left a comment

Choose a reason for hiding this comment

github-actions bot commented Feb 28, 2024 • edited

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

sdesmalen-arm left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

SamTebbs33 commented Mar 13, 2024

sdesmalen-arm left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

sdesmalen-arm left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

SamTebbs33 commented Feb 14, 2024 •

edited

llvmbot commented Feb 14, 2024 •

edited

SamTebbs33 Feb 15, 2024 •

edited

github-actions bot commented Feb 28, 2024 •

edited