Skip to content

Commit

Permalink
[PowerPC][P10] Add Vector pair calling convention
Browse files Browse the repository at this point in the history
Add the calling convention for the vector pair registers.
These registers overlap with the vector registers.

Part of an original patch by: Lei Huang

Reviewed By: nemanjai, #powerpc

Differential Revision: https://reviews.llvm.org/D117225
  • Loading branch information
stefanp-ibm committed Mar 15, 2022
1 parent 5791e28 commit 78406ac
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 43 deletions.
22 changes: 22 additions & 0 deletions llvm/lib/Target/PowerPC/PPCCallingConv.td
Expand Up @@ -363,3 +363,25 @@ def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,

def CSR_64_AllRegs_AIX_Dflt_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
(sequence "VSL%u", 0, 19))>;

def CSR_ALL_VSRP : CalleeSavedRegs<(sequence "VSRp%u", 0, 31)>;

def CSR_VSRP :
CalleeSavedRegs<(add VSRp26, VSRp27, VSRp28, VSRp29, VSRp30, VSRp31)>;

def CSR_SVR432_VSRP : CalleeSavedRegs<(add CSR_SVR432_Altivec, CSR_VSRP)>;

def CSR_SVR464_VSRP : CalleeSavedRegs<(add CSR_PPC64_Altivec, CSR_VSRP)>;

def CSR_SVR464_R2_VSRP : CalleeSavedRegs<(add CSR_SVR464_VSRP, X2)>;

def CSR_SVR32_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR32_ColdCC_Altivec,
(sub CSR_ALL_VSRP, VSRp17))>;

def CSR_SVR64_ColdCC_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC,
(sub CSR_ALL_VSRP, VSRp17))>;

def CSR_SVR64_ColdCC_R2_VSRP : CalleeSavedRegs<(add CSR_SVR64_ColdCC_VSRP, X2)>;

def CSR_64_AllRegs_VSRP :
CalleeSavedRegs<(add CSR_64_AllRegs_VSX, CSR_ALL_VSRP)>;
9 changes: 9 additions & 0 deletions llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
Expand Up @@ -1974,6 +1974,15 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,

const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();

// Do not explicitly save the callee saved VSRp registers.
// The individual VSR subregisters will be saved instead.
SavedRegs.reset(PPC::VSRp26);
SavedRegs.reset(PPC::VSRp27);
SavedRegs.reset(PPC::VSRp28);
SavedRegs.reset(PPC::VSRp29);
SavedRegs.reset(PPC::VSRp30);
SavedRegs.reset(PPC::VSRp31);

// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned LR = RegInfo->getRARegister();
Expand Down
51 changes: 38 additions & 13 deletions llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
Expand Up @@ -183,6 +183,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (!TM.isPPC64() && Subtarget.isAIXABI())
report_fatal_error("AnyReg unimplemented on 32-bit AIX.");
if (Subtarget.hasVSX()) {
if (Subtarget.pairedVectorMemops())
return CSR_64_AllRegs_VSRP_SaveList;
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
return CSR_64_AllRegs_AIX_Dflt_VSX_SaveList;
return CSR_64_AllRegs_VSX_SaveList;
Expand Down Expand Up @@ -210,21 +212,28 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isAIXABI())
report_fatal_error("Cold calling unimplemented on AIX.");
if (TM.isPPC64()) {
if (Subtarget.pairedVectorMemops())
return SaveR2 ? CSR_SVR64_ColdCC_R2_VSRP_SaveList
: CSR_SVR64_ColdCC_VSRP_SaveList;
if (Subtarget.hasAltivec())
return SaveR2 ? CSR_SVR64_ColdCC_R2_Altivec_SaveList
: CSR_SVR64_ColdCC_Altivec_SaveList;
return SaveR2 ? CSR_SVR64_ColdCC_R2_SaveList
: CSR_SVR64_ColdCC_SaveList;
}
// 32-bit targets.
if (Subtarget.hasAltivec())
if (Subtarget.pairedVectorMemops())
return CSR_SVR32_ColdCC_VSRP_SaveList;
else if (Subtarget.hasAltivec())
return CSR_SVR32_ColdCC_Altivec_SaveList;
else if (Subtarget.hasSPE())
return CSR_SVR32_ColdCC_SPE_SaveList;
return CSR_SVR32_ColdCC_SaveList;
}
// Standard calling convention CSRs.
if (TM.isPPC64()) {
if (Subtarget.pairedVectorMemops())
return SaveR2 ? CSR_SVR464_R2_VSRP_SaveList : CSR_SVR464_VSRP_SaveList;
if (Subtarget.hasAltivec() &&
(!Subtarget.isAIXABI() || TM.getAIXExtendedAltivecABI())) {
return SaveR2 ? CSR_PPC64_R2_Altivec_SaveList
Expand All @@ -239,6 +248,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
: CSR_AIX32_SaveList;
return CSR_AIX32_SaveList;
}
if (Subtarget.pairedVectorMemops())
return CSR_SVR432_VSRP_SaveList;
if (Subtarget.hasAltivec())
return CSR_SVR432_Altivec_SaveList;
else if (Subtarget.hasSPE())
Expand All @@ -252,6 +263,8 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
if (CC == CallingConv::AnyReg) {
if (Subtarget.hasVSX()) {
if (Subtarget.pairedVectorMemops())
return CSR_64_AllRegs_VSRP_RegMask;
if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI())
return CSR_64_AllRegs_AIX_Dflt_VSX_RegMask;
return CSR_64_AllRegs_VSX_RegMask;
Expand All @@ -275,20 +288,32 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
}

if (CC == CallingConv::Cold) {
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
: CSR_SVR64_ColdCC_RegMask)
: (Subtarget.hasAltivec() ? CSR_SVR32_ColdCC_Altivec_RegMask
: (Subtarget.hasSPE()
? CSR_SVR32_ColdCC_SPE_RegMask
: CSR_SVR32_ColdCC_RegMask));
if (TM.isPPC64())
return Subtarget.pairedVectorMemops()
? CSR_SVR64_ColdCC_VSRP_RegMask
: (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask
: CSR_SVR64_ColdCC_RegMask);
else
return Subtarget.pairedVectorMemops()
? CSR_SVR32_ColdCC_VSRP_RegMask
: (Subtarget.hasAltivec()
? CSR_SVR32_ColdCC_Altivec_RegMask
: (Subtarget.hasSPE() ? CSR_SVR32_ColdCC_SPE_RegMask
: CSR_SVR32_ColdCC_RegMask));
}

return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
: CSR_PPC64_RegMask)
: (Subtarget.hasAltivec()
? CSR_SVR432_Altivec_RegMask
: (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
: CSR_SVR432_RegMask));
if (TM.isPPC64())
return Subtarget.pairedVectorMemops()
? CSR_SVR464_VSRP_RegMask
: (Subtarget.hasAltivec() ? CSR_PPC64_Altivec_RegMask
: CSR_PPC64_RegMask);
else
return Subtarget.pairedVectorMemops()
? CSR_SVR432_VSRP_RegMask
: (Subtarget.hasAltivec()
? CSR_SVR432_Altivec_RegMask
: (Subtarget.hasSPE() ? CSR_SVR432_SPE_RegMask
: CSR_SVR432_RegMask));
}

const uint32_t*
Expand Down
76 changes: 46 additions & 30 deletions llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
Expand Up @@ -13,23 +13,29 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-LABEL: intrinsics1:
; CHECK: # %bb.0:
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -176(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 176
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -176(r1)
; CHECK-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
; CHECK-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
; CHECK-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
; CHECK-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
; CHECK-NEXT: xxlor vs0, v2, v2
; CHECK-NEXT: xxlor vs1, v3, v3
; CHECK-NEXT: stxvp vsp34, 128(r1) # 32-byte Folded Spill
; CHECK-NEXT: xxlor vs2, v4, v4
; CHECK-NEXT: xxlor vs3, v5, v5
; CHECK-NEXT: .cfi_offset v28, -80
; CHECK-NEXT: .cfi_offset v29, -64
; CHECK-NEXT: .cfi_offset v30, -48
; CHECK-NEXT: .cfi_offset v31, -32
; CHECK-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill
; CHECK-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill
; CHECK-NEXT: vmr v29, v3
; CHECK-NEXT: vmr v28, v2
; CHECK-NEXT: xxlor vs0, v28, v28
; CHECK-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill
; CHECK-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill
; CHECK-NEXT: vmr v31, v5
; CHECK-NEXT: vmr v30, v4
; CHECK-NEXT: std r30, 160(r1) # 8-byte Folded Spill
; CHECK-NEXT: xxlor vs1, v29, v29
; CHECK-NEXT: xxlor vs2, v30, v30
; CHECK-NEXT: xxlor vs3, v31, v31
; CHECK-NEXT: ld r30, 272(r1)
; CHECK-NEXT: stxvp vsp36, 96(r1) # 32-byte Folded Spill
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-NEXT: xxmfacc acc0
Expand All @@ -39,17 +45,19 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-NEXT: lxvp vsp0, 64(r1)
; CHECK-NEXT: lxvp vsp2, 32(r1)
; CHECK-NEXT: xxmtacc acc0
; CHECK-NEXT: lxvp vsp34, 128(r1) # 32-byte Folded Reload
; CHECK-NEXT: lxvp vsp36, 96(r1) # 32-byte Folded Reload
; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-NEXT: xvf16ger2pp acc0, v28, v30
; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload
; CHECK-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload
; CHECK-NEXT: xxmfacc acc0
; CHECK-NEXT: stxv vs0, 48(r30)
; CHECK-NEXT: stxv vs1, 32(r30)
; CHECK-NEXT: stxv vs2, 16(r30)
; CHECK-NEXT: stxv vs3, 0(r30)
; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 176
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
;
Expand All @@ -61,17 +69,23 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: .cfi_def_cfa_offset 256
; CHECK-BE-NEXT: .cfi_offset lr, 16
; CHECK-BE-NEXT: .cfi_offset r30, -16
; CHECK-BE-NEXT: .cfi_offset v28, -80
; CHECK-BE-NEXT: .cfi_offset v29, -64
; CHECK-BE-NEXT: .cfi_offset v30, -48
; CHECK-BE-NEXT: .cfi_offset v31, -32
; CHECK-BE-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: vmr v29, v3
; CHECK-BE-NEXT: vmr v28, v2
; CHECK-BE-NEXT: xxlor vs0, v28, v28
; CHECK-BE-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill
; CHECK-BE-NEXT: vmr v31, v5
; CHECK-BE-NEXT: vmr v30, v4
; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill
; CHECK-BE-NEXT: # kill: def $v5 killed $v5 killed $vsrp18 def $vsrp18
; CHECK-BE-NEXT: # kill: def $v4 killed $v4 killed $vsrp18 def $vsrp18
; CHECK-BE-NEXT: # kill: def $v3 killed $v3 killed $vsrp17 def $vsrp17
; CHECK-BE-NEXT: # kill: def $v2 killed $v2 killed $vsrp17 def $vsrp17
; CHECK-BE-NEXT: xxlor vs0, v2, v2
; CHECK-BE-NEXT: xxlor vs1, v3, v3
; CHECK-BE-NEXT: stxvp vsp34, 208(r1) # 32-byte Folded Spill
; CHECK-BE-NEXT: xxlor vs2, v4, v4
; CHECK-BE-NEXT: xxlor vs3, v5, v5
; CHECK-BE-NEXT: stxvp vsp36, 176(r1) # 32-byte Folded Spill
; CHECK-BE-NEXT: xxlor vs1, v29, v29
; CHECK-BE-NEXT: xxlor vs2, v30, v30
; CHECK-BE-NEXT: xxlor vs3, v31, v31
; CHECK-BE-NEXT: ld r30, 368(r1)
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
Expand All @@ -83,9 +97,11 @@ define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i
; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
; CHECK-BE-NEXT: xxmtacc acc0
; CHECK-BE-NEXT: lxvp vsp34, 208(r1) # 32-byte Folded Reload
; CHECK-BE-NEXT: lxvp vsp36, 176(r1) # 32-byte Folded Reload
; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
; CHECK-BE-NEXT: xvf16ger2pp acc0, v28, v30
; CHECK-BE-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload
; CHECK-BE-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload
; CHECK-BE-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
; CHECK-BE-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload
; CHECK-BE-NEXT: xxmfacc acc0
; CHECK-BE-NEXT: stxv vs1, 16(r30)
; CHECK-BE-NEXT: stxv vs0, 0(r30)
Expand Down

0 comments on commit 78406ac

Please sign in to comment.