Skip to content

Commit

Permalink
[X86][VARARG] Assign MMO earlier to avoid prolog insert point been su…
Browse files Browse the repository at this point in the history
…nk across VASTART_SAVE_XMM_REGS

The changes in D80163 defered the assignment of MachineMemOperand (MMO)
until the X86ExpandPseudo pass. This will result in crash due to prolog
insert point been sunk across the pseudo instruction VASTART_SAVE_XMM_REGS.

Moving the assignment to the creation of the node can avoid the problem.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D112859
  • Loading branch information
phoebewang authored and tstellar committed Nov 24, 2021
1 parent 41c85bb commit 19b8368
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 43 deletions.
33 changes: 11 additions & 22 deletions llvm/lib/Target/X86/X86ExpandPseudo.cpp
Expand Up @@ -657,35 +657,24 @@ void X86ExpandPseudo::ExpandVastartSaveXmmRegs(
EntryBlk->end());
TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk);

int64_t FrameIndex = VAStartPseudoInstr->getOperand(1).getImm();
Register BaseReg;
uint64_t FrameOffset =
X86FL->getFrameIndexReference(*Func, FrameIndex, BaseReg).getFixed();
uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(2).getImm();
uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm();
uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm();

// TODO: add support for YMM and ZMM here.
unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;

// In the XMM save block, save all the XMM argument registers.
for (int64_t OpndIdx = 3, RegIdx = 0;
for (int64_t OpndIdx = 7, RegIdx = 0;
OpndIdx < VAStartPseudoInstr->getNumOperands() - 1;
OpndIdx++, RegIdx++) {

int64_t Offset = FrameOffset + VarArgsRegsOffset + RegIdx * 16;

MachineMemOperand *MMO = Func->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*Func, FrameIndex, Offset),
MachineMemOperand::MOStore,
/*Size=*/16, Align(16));

BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc))
.addReg(BaseReg)
.addImm(/*Scale=*/1)
.addReg(/*IndexReg=*/0)
.addImm(/*Disp=*/Offset)
.addReg(/*Segment=*/0)
.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg())
.addMemOperand(MMO);
auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc));
for (int i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16);
else
NewMI.add(VAStartPseudoInstr->getOperand(i + 1));
}
NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg());
assert(Register::isPhysicalRegister(
VAStartPseudoInstr->getOperand(OpndIdx).getReg()));
}
Expand Down
14 changes: 10 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -3533,13 +3533,19 @@ void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
SmallVector<SDValue, 12> SaveXMMOps;
SaveXMMOps.push_back(Chain);
SaveXMMOps.push_back(ALVal);
SaveXMMOps.push_back(
DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
SaveXMMOps.push_back(RSFIN);
SaveXMMOps.push_back(
DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
llvm::append_range(SaveXMMOps, LiveXMMRegs);
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
MVT::Other, SaveXMMOps));
MachineMemOperand *StoreMMO =
DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo::getFixedStack(
DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
Offset),
MachineMemOperand::MOStore, 128, Align(16));
MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
DL, DAG.getVTList(MVT::Other),
SaveXMMOps, MVT::i8, StoreMMO));
}

if (!MemOps.empty())
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -627,10 +627,6 @@ namespace llvm {
// packed single precision.
DPBF16PS,

// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,

// Windows's _chkstk call to do stack probing.
WIN_ALLOCA,

Expand Down Expand Up @@ -848,6 +844,10 @@ namespace llvm {
AESENCWIDE256KL,
AESDECWIDE256KL,

// Save xmm argument registers to the stack, according to %al. An operator
// is needed so that this can be expanded with control flow.
VASTART_SAVE_XMM_REGS,

// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
Expand Down
12 changes: 4 additions & 8 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Expand Up @@ -69,16 +69,12 @@ def : Pat<(X86callseq_start timm:$amt1, timm:$amt2),
let SchedRW = [WriteSystem] in {

// x86-64 va_start lowering magic.
let hasSideEffects = 1, Defs = [EFLAGS] in {
let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in {
def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
(outs),
(ins GR8:$al,
i32imm:$regsavefi, i32imm:$offset,
variable_ops),
"#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
[(X86vastart_save_xmm_regs GR8:$al,
timm:$regsavefi,
timm:$offset),
(ins GR8:$al, i8mem:$regsavefi, variable_ops),
"#VASTART_SAVE_XMM_REGS $al, $regsavefi",
[(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi),
(implicit EFLAGS)]>;
}

Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/X86/X86InstrInfo.td
Expand Up @@ -91,8 +91,7 @@ def SDT_X86Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_X86NtBrind : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;

def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
SDTCisVT<1, iPTR>,
SDTCisVT<2, iPTR>]>;
SDTCisPtrTy<1>]>;

def SDT_X86VAARG : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
Expand Down Expand Up @@ -184,7 +183,7 @@ def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
def X86vastart_save_xmm_regs :
SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
SDT_X86VASTART_SAVE_XMM_REGS,
[SDNPHasChain, SDNPVariadic]>;
[SDNPHasChain, SDNPMayStore, SDNPMemOperand, SDNPVariadic]>;
def X86vaarg64 :
SDNode<"X86ISD::VAARG_64", SDT_X86VAARG,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore,
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/vaargs-prolog-insert.ll
Expand Up @@ -5,6 +5,7 @@
define void @reduce(i32, i32, i32, i32, i32, i32, ...) nounwind {
; CHECK-LABEL: reduce:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $56, %rsp
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.3:
Expand All @@ -21,15 +22,14 @@ define void @reduce(i32, i32, i32, i32, i32, i32, ...) nounwind {
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: subq $56, %rsp
; CHECK-NEXT: leaq -{{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: movq %rax, 16
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax
; CHECK-NEXT: movq %rax, 8
; CHECK-NEXT: movl $48, 4
; CHECK-NEXT: movl $48, 0
; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: retq
br i1 undef, label %8, label %7

Expand Down

0 comments on commit 19b8368

Please sign in to comment.