From cb1a106044c5ea81539582d5f53db049715621c0 Mon Sep 17 00:00:00 2001 From: "Zabawa, Daniel" Date: Wed, 30 Oct 2024 09:54:04 -0700 Subject: [PATCH 1/2] [X86] Recognize POP/ADD/SUB modifying rsp in getSPAdjust. This code assumed only PUSHes would appear in call sequences. However, if calls require frame-pointer/base-pointer spills, only the PUSH operations inserted by spillFPBP will be recognized, and the adjustments to frame object offsets in prologepilog will be incorrect. This change correctly reports the SP adjustment for POP and ADD/SUB to rsp, and an assertion for unrecognized instructions that modify rsp. There is no unit test provided as the issue can only be reliably reproduced in MIR (to force a spill/restore outside and inside a call sequence, respectively), but MIR cannot serialize the FPClobberedByCall field that triggers spillFPBP to run. --- llvm/lib/Target/X86/X86InstrInfo.cpp | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 38ea1f35be2b9..e2285417a155e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -452,10 +452,13 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const { return -(I->getOperand(1).getImm()); } - // Currently handle only PUSHes we can reasonably expect to see - // in call sequences + // Handle other opcodes we reasonably expect to see in call + // sequences. Note this may include spill/restore of FP/BP. switch (MI.getOpcode()) { default: + assert(!(MI.modifiesRegister(X86::RSP, &RI) || + MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) && + "Unhandled opcode in getSPAdjust"); return 0; case X86::PUSH32r: case X86::PUSH32rmm: @@ -467,6 +470,30 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const { case X86::PUSH64rmr: case X86::PUSH64i32: return 8; + case X86::POP32r: + case X86::POP32rmm: + case X86::POP32rmr: + return -4; + case X86::POP64r: + case X86::POP64rmm: + case X86::POP64rmr: + return -8; + // FIXME: (implement and) use isAddImmediate in the + // default case instead of the following ADD/SUB cases. + case X86::ADD32ri: + case X86::ADD32ri8: + case X86::ADD64ri32: + if (MI.getOperand(0).getReg() == X86::RSP && + MI.getOperand(1).getReg() == X86::RSP) + return -MI.getOperand(2).getImm(); + return 0; + case X86::SUB32ri: + case X86::SUB32ri8: + case X86::SUB64ri32: + if (MI.getOperand(0).getReg() == X86::RSP && + MI.getOperand(1).getReg() == X86::RSP) + return MI.getOperand(2).getImm(); + return 0; } } From 32e26ed862911ac959b608619bbada88c0ab62f5 Mon Sep 17 00:00:00 2001 From: "Zabawa, Daniel" Date: Thu, 31 Oct 2024 12:06:46 -0700 Subject: [PATCH 2/2] Add test and new MFI serializations to support test --- .../lib/Target/X86/X86MachineFunctionInfo.cpp | 6 +- llvm/lib/Target/X86/X86MachineFunctionInfo.h | 4 + llvm/test/CodeGen/X86/pr114265.mir | 94 +++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/X86/pr114265.mir diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp index 7b57f7c23bf4d..aec8f3ee7484f 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.cpp @@ -15,7 +15,9 @@ using namespace llvm; yaml::X86MachineFunctionInfo::X86MachineFunctionInfo( const llvm::X86MachineFunctionInfo &MFI) - : AMXProgModel(MFI.getAMXProgModel()) {} + : AMXProgModel(MFI.getAMXProgModel()), + FPClobberedByCall(MFI.getFPClobberedByCall()), + HasPushSequences(MFI.getHasPushSequences()) {} void yaml::X86MachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { MappingTraits::mapping(YamlIO, *this); @@ -31,6 +33,8 @@ MachineFunctionInfo *X86MachineFunctionInfo::clone( void X86MachineFunctionInfo::initializeBaseYamlFields( const yaml::X86MachineFunctionInfo &YamlMFI) { AMXProgModel = YamlMFI.AMXProgModel; + FPClobberedByCall = YamlMFI.FPClobberedByCall; + HasPushSequences = YamlMFI.HasPushSequences; } void X86MachineFunctionInfo::anchor() { } diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 24371369d4a45..6414e6f22500c 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -38,6 +38,8 @@ template <> struct ScalarEnumerationTraits { struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo { AMXProgModelEnum AMXProgModel; + bool FPClobberedByCall; + bool HasPushSequences; X86MachineFunctionInfo() = default; X86MachineFunctionInfo(const llvm::X86MachineFunctionInfo &MFI); @@ -49,6 +51,8 @@ struct X86MachineFunctionInfo final : public yaml::MachineFunctionInfo { template <> struct MappingTraits { static void mapping(IO &YamlIO, X86MachineFunctionInfo &MFI) { YamlIO.mapOptional("amxProgModel", MFI.AMXProgModel); + YamlIO.mapOptional("FPClobberedByCall", MFI.FPClobberedByCall, false); + YamlIO.mapOptional("hasPushSequences", MFI.HasPushSequences, false); } }; } // end namespace yaml diff --git a/llvm/test/CodeGen/X86/pr114265.mir b/llvm/test/CodeGen/X86/pr114265.mir new file mode 100644 index 0000000000000..b6e724b4bd128 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr114265.mir @@ -0,0 +1,94 @@ +# The change being tested here is that X86InstrInfo's getSPAdjust correctly handles POP/ADD instructions within +# call sequences, as previously it assumed only PUSHes would be present for parameter passing. +# What this test actually does is recreate a situation where: +# - something other than a PUSH appears in a call sequence, and +# - failing to recognize the SP adjustment by such an instruction actually changes something +# observable. +# +# To this end, we create a situation where: +# - the FP must be spilled around calls +# - a frame object is stored before a call frame and loaded in the call frame +# (emulating an argument restored from spill), following a call which POPs something +# - call-frame pseudos can *not* be simplified early in prologepilog +# +# The issue being corrected is the case where prologepilog sees the SP adjustment of PUSHes only, and not +# POP/ADD. This adjustment value can be carried over and incorrectly applied to frame offsets. So, +# in the following we ensure that references to a frame object carry the same offset. +# +# NB: +# FPClobberedByCall and hasPushSequence have to be supplied in the MFI section. The former +# is required to force spill of the FP, and the latter ensures call-frame pseudos are not simplified. +# +# The csr_64_intel_ocl_bi_avx512 regmask is used to ensure that the FP is spilled. Other csr's may +# acheive the same. +# +# RUN: llc -mtriple x86_64-unknown-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s +--- +name: f +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: true +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$rdi', virtual-reg: '' } + - { reg: '$rsi', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 64 + adjustsStack: true + hasCalls: true + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 64, + alignment: 32, stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +machineFunctionInfo: + FPClobberedByCall: true + hasPushSequences: true +body: | + bb.0: + liveins: $rdi, $rsi + MOV64mr %stack.0, 1, $noreg, 0, $noreg, renamable $rdi :: (store (s64) into %stack.0) + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %stack.0) + $rdi = COPY renamable $rax + CALL64r renamable undef $rsi, csr_64_intel_ocl_bi_avx512, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp +... +# ensure the store and load to the frame object have matching offsets after resolution. +# CHECK: MOV64mr $rsp, 1, $noreg, [[DISP:[1-9][0-9]+]] +# CHECK: MOV64rm $rsp, 1, $noreg, [[DISP]]