-
Notifications
You must be signed in to change notification settings - Fork 15.1k
X86: Stop using MachineFunction in getPointerRegClass #156880
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
X86: Stop using MachineFunction in getPointerRegClass #156880
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-x86 Author: Matt Arsenault (arsenm) ChangesThis should be a low level function used to interpret an This will help unify X86 onto a pending replacement mechanism for Full diff: https://github.com/llvm/llvm-project/pull/156880.diff 8 Files Affected:
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index d406277e440bb..ff22ee8c86fac 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -476,7 +476,8 @@ static bool isIndirectBranchOrTailCall(const MachineInstr &MI) {
return MI.getDesc().isIndirectBranch() /*Make below code in a good shape*/ ||
Opc == X86::TAILJMPr || Opc == X86::TAILJMPm ||
Opc == X86::TAILJMPr64 || Opc == X86::TAILJMPm64 ||
- Opc == X86::TCRETURNri || Opc == X86::TCRETURNmi ||
+ Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri ||
+ Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNmi ||
Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNmi64 ||
Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TAILJMPr64_REX ||
Opc == X86::TAILJMPm64_REX;
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 0e6b4dffec3a6..9457e718de699 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -269,6 +269,8 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
case X86::TCRETURNdi:
case X86::TCRETURNdicc:
case X86::TCRETURNri:
+ case X86::TCRETURN_WIN64ri:
+ case X86::TCRETURN_HIPE32ri:
case X86::TCRETURNmi:
case X86::TCRETURNdi64:
case X86::TCRETURNdi64cc:
@@ -346,8 +348,9 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
for (unsigned i = 0; i != X86::AddrNumOperands; ++i)
MIB.add(MBBI->getOperand(i));
- } else if ((Opcode == X86::TCRETURNri64) ||
- (Opcode == X86::TCRETURNri64_ImpCall)) {
+ } else if (Opcode == X86::TCRETURNri64 ||
+ Opcode == X86::TCRETURNri64_ImpCall ||
+ Opcode == X86::TCRETURN_WIN64ri) {
JumpTarget.setIsKill();
BuildMI(MBB, MBBI, DL,
TII->get(IsX64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index cba7843d53e3f..a293b4c87cfe4 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -2398,7 +2398,8 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
}
static bool isTailCallOpcode(unsigned Opc) {
- return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi ||
+ return Opc == X86::TCRETURNri || Opc == X86::TCRETURN_WIN64ri ||
+ Opc == X86::TCRETURN_HIPE32ri || Opc == X86::TCRETURNdi ||
Opc == X86::TCRETURNmi || Opc == X86::TCRETURNri64 ||
Opc == X86::TCRETURNri64_ImpCall || Opc == X86::TCRETURNdi64 ||
Opc == X86::TCRETURNmi64;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 927b2c8b22f05..734c488fe3159 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1326,7 +1326,11 @@ def : Pat<(X86imp_call (i64 tglobaladdr:$dst)),
// Match an X86tcret that uses less than 7 volatile registers.
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(TCRETURNri ptr_rc_tailcall:$dst, timm:$off)>,
- Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
+ Requires<[Not64BitMode, IsNotHiPECCFunc, NotUseIndirectThunkCalls]>;
+
+def : Pat<(X86tcret GR32:$dst, timm:$off),
+ (TCRETURN_HIPE32ri GR32:$dst, timm:$off)>,
+ Requires<[Not64BitMode, IsHiPECCFunc, NotUseIndirectThunkCalls]>;
// FIXME: This is disabled for 32-bit PIC mode because the global base
// register which is part of the address mode may be assigned a
@@ -1344,6 +1348,10 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), timm:$off),
(TCRETURNdi texternalsym:$dst, timm:$off)>,
Requires<[NotLP64]>;
+def : Pat<(X86tcret GR64_TCW64:$dst, timm:$off),
+ (TCRETURN_WIN64ri GR64_TCW64:$dst, timm:$off)>,
+ Requires<[In64BitMode, IsWin64CCFunc, NotUseIndirectThunkCalls]>;
+
def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
(TCRETURNri64 ptr_rc_tailcall:$dst, timm:$off)>,
Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>;
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index 22253bf0413a4..3acffe2a209fb 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -282,6 +282,12 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
[]>, Sched<[WriteJump]>;
def TCRETURNri : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset),
[]>, Sched<[WriteJump]>;
+
+ def TCRETURN_WIN64ri : PseudoI<(outs), (ins GR64_TCW64:$dst, i32imm:$offset),
+ []>, Sched<[WriteJump]>;
+ def TCRETURN_HIPE32ri : PseudoI<(outs), (ins GR32:$dst, i32imm:$offset),
+ []>, Sched<[WriteJump]>;
+
let mayLoad = 1 in
def TCRETURNmi : PseudoI<(outs), (ins i32mem_TC:$dst, i32imm:$offset),
[]>, Sched<[WriteJumpLd]>;
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index df1541e9085bb..77efdde77eceb 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -233,6 +233,12 @@ let RecomputePerFunction = 1 in {
"!Subtarget->hasSSE41()">;
def ImportCallOptimizationEnabled : Predicate<"MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">;
def ImportCallOptimizationDisabled : Predicate<"!MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">;
+
+ def IsWin64CCFunc : Predicate<"MF->getFunction().getCallingConv() == CallingConv::Win64">;
+ def IsHiPECCFunc : Predicate<"MF->getFunction().getCallingConv() == CallingConv::HiPE">;
+
+ def IsNotHiPECCFunc : Predicate<
+ "MF->getFunction().getCallingConv() != CallingConv::HiPE">;
}
def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 3f4955f28e68b..6b4ccc2bec9c6 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -220,24 +220,10 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
// NOSP does not contain RIP, so no special case here.
return &X86::GR32_NOREX_NOSPRegClass;
case 4: // Available for tailcall (not callee-saved GPRs).
- return getGPRsForTailCall(MF);
+ return Is64Bit ? &X86::GR64_TCRegClass : &X86::GR32_TCRegClass;
}
}
-const TargetRegisterClass *
-X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
- const Function &F = MF.getFunction();
- if (IsWin64 || IsUEFI64 || (F.getCallingConv() == CallingConv::Win64))
- return &X86::GR64_TCW64RegClass;
- else if (Is64Bit)
- return &X86::GR64_TCRegClass;
-
- bool hasHipeCC = (F.getCallingConv() == CallingConv::HiPE);
- if (hasHipeCC)
- return &X86::GR32RegClass;
- return &X86::GR32_TCRegClass;
-}
-
const TargetRegisterClass *
X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
if (RC == &X86::CCRRegClass) {
@@ -1016,6 +1002,8 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg(
case X86::RETI64:
case X86::TCRETURNdi:
case X86::TCRETURNri:
+ case X86::TCRETURN_WIN64ri:
+ case X86::TCRETURN_HIPE32ri:
case X86::TCRETURNmi:
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index 2f4c55cfad6d2..d022e5ab87945 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -87,11 +87,6 @@ class X86RegisterInfo final : public X86GenRegisterInfo {
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
- /// getGPRsForTailCall - Returns a register class with registers that can be
- /// used in forming tail calls.
- const TargetRegisterClass *
- getGPRsForTailCall(const MachineFunction &MF) const;
-
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
3fe72b6
to
89e5ab5
Compare
def ImportCallOptimizationEnabled : Predicate<"MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">; | ||
def ImportCallOptimizationDisabled : Predicate<"!MF->getFunction().getParent()->getModuleFlag(\"import-call-optimization\")">; | ||
|
||
def IsWin64CCFunc : Predicate<"MF->getFunction().getCallingConv() == CallingConv::Win64">; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this correct when targeting Windows OS? I'm pretty sure in that context, we use the win64 convention, but we model it as the platform C calling convention. Maybe that doesn't matter for the purpose you're using the predicate for, but the predicate may be misleading to the reader.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess I'm saying using this condition is probably more correct and if it passed tests I'd just say ship it:
CC == CallingConv::Win64 || (Triple.isOSWindows() && Triple.isArch64Bit() && CC == CallingConv::C)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is one win64 execution test failing, not sure if it's related to this PR. If this does fix it, we're missing tests as usual
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Turns out there's a isCallingConvWin64 helper for this
e39fb64
to
50b5094
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good assuming the tests are green
50b5094
to
df38881
Compare
This should be a low level function used to interpret an MCInstrDesc that only depends on the hwmode. It should not depend on other dynamic context like the parent function. In general more ABI properties like this should be expressed directly in the instruction definitions, so introduce new TCRETURN pseudos to use with the special case register classes (e.g. in a better future the callee saved registers would always be encoded directly in a mask on the return instruction). This will help unify X86 onto a pending replacement mechanism for getPointerRegClass.
df38881
to
4e37486
Compare
This caused quite widespread breakage when targeting windows x86_64. It doesn't seem to trigger any failed asserts anywhere, but just causes the built binaries to crash/misbehave. I'll try to pinpoint some form of reproducer for this. |
I've got one breakage pinpointed down to this snippet: __extension__ typedef unsigned long long size_t;
void * __attribute__((__cdecl__)) memcpy(void * __restrict__ _Dst,const void * __restrict__ _Src,size_t _Size) ;
typedef struct {
long long __clang_max_align_nonce1
__attribute__((__aligned__(__alignof__(long long))));
long double __clang_max_align_nonce2
__attribute__((__aligned__(__alignof__(long double))));
} max_align_t;
__extension__ typedef unsigned long long uintptr_t;
typedef enum memory_order {
memory_order_relaxed = 0,
memory_order_consume = 1,
memory_order_acquire = 2,
memory_order_release = 3,
memory_order_acq_rel = 4,
memory_order_seq_cst = 5
} memory_order;
typedef _Atomic(uintptr_t) atomic_uintptr_t;
typedef union {
void *nc;
const void *c;
} AVRefStructOpaque;
typedef struct RefCount {
atomic_uintptr_t refcount;
AVRefStructOpaque opaque;
void (*free_cb)(AVRefStructOpaque opaque, void *obj);
void (*free)(void *ref);
} RefCount;
static RefCount *get_refcount(void *obj)
{
RefCount *ref = (RefCount*)((char*)obj - (((sizeof(RefCount))+(((64) > (_Alignof(max_align_t)) ? (64) : (_Alignof(max_align_t))))-1)&~((((64) > (_Alignof(max_align_t)) ? (64) : (_Alignof(max_align_t))))-1)));
((void)0);
return ref;
}
void av_refstruct_unref(void *objp)
{
void *obj;
RefCount *ref;
memcpy(&obj, objp, sizeof(obj));
if (!obj)
return;
memcpy(objp, &(void *){ ((void*)0) }, sizeof(obj));
ref = get_refcount(obj);
if (__c11_atomic_fetch_sub(&ref->refcount, 1, memory_order_acq_rel) == 1) {
if (ref->free_cb)
ref->free_cb(ref->opaque, obj);
ref->free(ref);
}
return;
} Compiled like this: $ clang-good -target x86_64-windows-gnu repro.c -O2 -S -o out-good.s
$ clang-bad -target x86_64-windows-gnu repro.c -O2 -S -o out-bad.s
$ diff -u out-good.s out-bad.s --- out-good.s 2025-09-11 13:10:33.754494513 +0300
+++ out-bad.s 2025-09-11 13:10:34.988497370 +0300
@@ -22,31 +22,30 @@
subq $40, %rsp
.seh_stackalloc 40
.seh_endprologue
- movq (%rcx), %rdx
- testq %rdx, %rdx
+ movq (%rcx), %rsi
+ testq %rsi, %rsi
je .LBB0_5
# %bb.1: # %if.end
movq $0, (%rcx)
- lock decq -64(%rdx)
+ lock decq -64(%rsi)
jne .LBB0_5
# %bb.2: # %if.then1
- leaq -64(%rdx), %rsi
- movq -48(%rdx), %rax
+ leaq -64(%rsi), %rdi
+ movq -48(%rsi), %rax
testq %rax, %rax
je .LBB0_4
# %bb.3: # %if.then3
- movq -56(%rdx), %rcx
- movq %rdx, %rdi
+ movq -56(%rsi), %rcx
+ movq %rsi, %rdx
callq *%rax
- movq %rdi, %rdx
.LBB0_4: # %if.end5
- movq %rsi, %rcx
+ movq %rdi, %rcx
.seh_startepilogue
addq $40, %rsp
popq %rdi
popq %rsi
.seh_endepilogue
- rex64 jmpq *-40(%rdx) # TAILCALL
+ rex64 jmpq *-40(%rsi) # TAILCALL
.LBB0_5: # %cleanup
.seh_startepilogue
addq $40, %rsp Most of this is just cosmetic changes where different registers are used, but one instruction, |
Should be fixed by #158055 |
This should be a low level function used to interpret an
MCInstrDesc that only depends on the hwmode. It should not depend
on other dynamic context like the parent function. In general more
ABI properties like this should be expressed directly in the instruction
definitions, so introduce new TCRETURN pseudos to use with the special
case register classes (e.g. in a better future the callee saved registers
would always be encoded directly in a mask on the return instruction).
This will help unify X86 onto a pending replacement mechanism for
getPointerRegClass.