Skip to content

Commit

Permalink
[X86InstrInfo] support memfold on spillable inline asm (llvm#70832)
Browse files Browse the repository at this point in the history
This enables -regalloc=greedy to memfold spillable inline asm
MachineOperands.

Because no instruction selection framework marks MachineOperands as
spillable, no language frontend can observe functional changes from this
patch. That will change once instruction selection frameworks are
updated.

Link: llvm#20571
  • Loading branch information
nickdesaulniers committed Nov 29, 2023
1 parent 0ccef6a commit b053359
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 14 deletions.
4 changes: 2 additions & 2 deletions llvm/include/llvm/CodeGen/TargetInstrInfo.h
Expand Up @@ -2194,8 +2194,8 @@ class TargetInstrInfo : public MCInstrInfo {
/// finalize-isel. Example:
/// INLINEASM ... 262190 /* mem:m */, %stack.0.x.addr, 1, $noreg, 0, $noreg
/// we would add placeholders for: ^ ^ ^ ^
virtual void
getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops) const {
virtual void getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
int FI) const {
llvm_unreachable("unknown number of operands necessary");
}

Expand Down
33 changes: 21 additions & 12 deletions llvm/lib/CodeGen/TargetInstrInfo.cpp
Expand Up @@ -578,28 +578,37 @@ static void foldInlineAsmMemOperand(MachineInstr *MI, unsigned OpNo, int FI,
foldInlineAsmMemOperand(MI, TiedTo, FI, TII);
}

// Change the operand from a register to a frame index.
MO.ChangeToFrameIndex(FI, MO.getTargetFlags());

SmallVector<MachineOperand, 4> NewOps;
TII.getFrameIndexOperands(NewOps);
SmallVector<MachineOperand, 5> NewOps;
TII.getFrameIndexOperands(NewOps, FI);
assert(!NewOps.empty() && "getFrameIndexOperands didn't create any operands");
MI->insert(MI->operands_begin() + OpNo + 1, NewOps);
MI->removeOperand(OpNo);
MI->insert(MI->operands_begin() + OpNo, NewOps);

// Change the previous operand to a MemKind InlineAsm::Flag. The second param
// is the per-target number of operands that represent the memory operand
// excluding this one (MD). This includes MO.
InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size() + 1);
InlineAsm::Flag F(InlineAsm::Kind::Mem, NewOps.size());
F.setMemConstraint(InlineAsm::ConstraintCode::m);
MachineOperand &MD = MI->getOperand(OpNo - 1);
MD.setImm(F);

// Update mayload/maystore metadata.
// Update mayload/maystore metadata, and memoperands.
MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
MachineOperand &ExtraMO = MI->getOperand(InlineAsm::MIOp_ExtraInfo);
if (RI.Reads)
if (RI.Reads) {
ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayLoad);
if (RI.Writes)
Flags |= MachineMemOperand::MOLoad;
}
if (RI.Writes) {
ExtraMO.setImm(ExtraMO.getImm() | InlineAsm::Extra_MayStore);
Flags |= MachineMemOperand::MOStore;
}
MachineFunction *MF = MI->getMF();
const MachineFrameInfo &MFI = MF->getFrameInfo();
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), Flags, MFI.getObjectSize(FI),
MFI.getObjectAlign(FI));
MI->addMemOperand(*MF, MMO);
}

// Returns nullptr if not possible to fold.
Expand Down Expand Up @@ -671,7 +680,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
if (NewMI)
MBB->insert(MI, NewMI);
} else if (MI.isInlineAsm()) {
NewMI = foldInlineAsmMemOperand(MI, Ops, FI, *this);
return foldInlineAsmMemOperand(MI, Ops, FI, *this);
} else {
// Ask the target to do the actual folding.
NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
Expand Down Expand Up @@ -744,7 +753,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
if (NewMI)
NewMI = &*MBB.insert(MI, NewMI);
} else if (MI.isInlineAsm() && isLoadFromStackSlot(LoadMI, FrameIndex)) {
NewMI = foldInlineAsmMemOperand(MI, Ops, FrameIndex, *this);
return foldInlineAsmMemOperand(MI, Ops, FrameIndex, *this);
} else {
// Ask the target to do the actual folding.
NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI, LIS);
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Expand Up @@ -10273,5 +10273,14 @@ void X86InstrInfo::genAlternativeCodeSequence(
}
}

// See also: X86DAGToDAGISel::SelectInlineAsmMemoryOperand().
void X86InstrInfo::getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
int FI) const {
X86AddressMode M;
M.BaseType = X86AddressMode::FrameIndexBase;
M.Base.FrameIndex = FI;
M.getFullAddress(Ops);
}

#define GET_INSTRINFO_HELPERS
#include "X86GenInstrInfo.inc"
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.h
Expand Up @@ -659,6 +659,9 @@ class X86InstrInfo final : public X86GenInstrInfo {
return false;
}

void getFrameIndexOperands(SmallVectorImpl<MachineOperand> &Ops,
int FI) const override;

private:
/// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
/// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
Expand Down
232 changes: 232 additions & 0 deletions llvm/test/CodeGen/MIR/X86/inline-asm-rm-exhaustion.mir
@@ -0,0 +1,232 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -start-after=finalize-isel -regalloc=greedy -stop-after=greedy \
# RUN: -verify-machineinstrs -verify-regalloc %s -o - | FileCheck %s
--- |
target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386-unknown-linux-gnu"

define void @input(i32 %0) #0 {
call void asm "# $0", "rm,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(i32 %0)
ret void
}

define i32 @output() #0 {
%1 = alloca i32, align 4
call void asm "# $0", "=*rm,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(ptr nonnull elementtype(i32) %1)
%2 = load i32, ptr %1, align 4
ret i32 %2
}

define i32 @inout(i32 %0) #0 {
%2 = alloca i32, align 4
store i32 %0, ptr %2, align 4
call void asm "# $0 $1", "=*rm,0,~{ax},~{cx},~{dx},~{si},~{di},~{bx},~{bp}"(ptr nonnull elementtype(i32) %2, i32 %0)
%3 = load i32, ptr %2, align 4
ret i32 %3
}

attributes #0 = { nounwind }

...
---
name: input
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
callsEHReturn: false
callsUnwindInit: false
hasEHCatchret: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: false
failsVerification: false
tracksDebugUserValues: false
registers:
- { id: 0, class: gr32, preferred-register: '' }
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
functionContext: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
stack: []
entry_values: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.1):
; CHECK-LABEL: name: input
; CHECK: INLINEASM &"# $0", 8 /* mayload attdialect */, 262190 /* mem:m */, %fixed-stack.0, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (load (s32) from %fixed-stack.0, align 16)
; CHECK-NEXT: RET 0
%0:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
INLINEASM &"# $0", 0 /* attdialect */, 1076101129 /* reguse:GR32 spillable */, %0, 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
RET 0
...
---
name: output
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
callsEHReturn: false
callsUnwindInit: false
hasEHCatchret: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: false
failsVerification: false
tracksDebugUserValues: false
registers:
- { id: 0, class: gr32, preferred-register: '' }
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
functionContext: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack: []
stack:
- { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4,
stack-id: default, callee-saved-register: '', callee-saved-restored: true,
debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
entry_values: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.0):
; CHECK-LABEL: name: output
; CHECK: INLINEASM &"# $0", 16 /* maystore attdialect */, 262190 /* mem:m */, %stack.1, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (store (s32) into %stack.1)
; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1)
; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into %ir.1)
; CHECK-NEXT: $eax = COPY [[MOV32rm]]
; CHECK-NEXT: RET 0, $eax
INLINEASM &"# $0", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
MOV32mr %stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.1)
$eax = COPY %0
RET 0, $eax
...
---
name: inout
alignment: 16
exposesReturnsTwice: false
legalized: false
regBankSelected: false
selected: false
failedISel: false
tracksRegLiveness: true
hasWinCFI: false
callsEHReturn: false
callsUnwindInit: false
hasEHCatchret: false
hasEHScopes: false
hasEHFunclets: false
isOutlined: false
debugInstrRef: false
failsVerification: false
tracksDebugUserValues: false
registers:
- { id: 0, class: gr32, preferred-register: '' }
- { id: 1, class: gr32, preferred-register: '' }
liveins: []
frameInfo:
isFrameAddressTaken: false
isReturnAddressTaken: false
hasStackMap: false
hasPatchPoint: false
stackSize: 0
offsetAdjustment: 0
maxAlignment: 4
adjustsStack: false
hasCalls: false
stackProtector: ''
functionContext: ''
maxCallFrameSize: 4294967295
cvBytesOfCalleeSavedRegisters: 0
hasOpaqueSPAdjustment: false
hasVAStart: false
hasMustTailInVarArgFunc: false
hasTailCall: false
localFrameSize: 0
savePoint: ''
restorePoint: ''
fixedStack:
- { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default,
isImmutable: false, isAliased: false, callee-saved-register: '',
callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '',
debug-info-location: '' }
stack: []
entry_values: []
callSites: []
debugValueSubstitutions: []
constants: []
machineFunctionInfo: {}
body: |
bb.0 (%ir-block.1):
; CHECK-LABEL: name: inout
; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
; CHECK-NEXT: MOV32mr %stack.0, 1, $noreg, 0, $noreg, [[MOV32rm]] :: (store (s32) into %stack.0)
; CHECK-NEXT: INLINEASM &"# $0 $1", 24 /* mayload maystore attdialect */, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg, 262190 /* mem:m */, %stack.0, 1, $noreg, 0, $noreg, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $cx, 12 /* clobber */, implicit-def dead early-clobber $dx, 12 /* clobber */, implicit-def dead early-clobber $si, 12 /* clobber */, implicit-def dead early-clobber $di, 12 /* clobber */, implicit-def dead early-clobber $bx, 12 /* clobber */, implicit-def dead early-clobber $bp :: (load store (s32) on %stack.0)
; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0)
; CHECK-NEXT: MOV32mr %fixed-stack.0, 1, $noreg, 0, $noreg, [[MOV32rm1]] :: (store (s32) into %ir.2, align 16)
; CHECK-NEXT: $eax = COPY [[MOV32rm1]]
; CHECK-NEXT: RET 0, $eax
%1:gr32 = MOV32rm %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %fixed-stack.0, align 16)
INLINEASM &"# $0 $1", 0 /* attdialect */, 1076101130 /* regdef:GR32 spillable */, def %0, 2147483657 /* reguse tiedto:$0 */, %1(tied-def 3), 12 /* clobber */, implicit-def early-clobber $ax, 12 /* clobber */, implicit-def early-clobber $cx, 12 /* clobber */, implicit-def early-clobber $dx, 12 /* clobber */, implicit-def early-clobber $si, 12 /* clobber */, implicit-def early-clobber $di, 12 /* clobber */, implicit-def early-clobber $bx, 12 /* clobber */, implicit-def early-clobber $bp
MOV32mr %fixed-stack.0, 1, $noreg, 0, $noreg, %0 :: (store (s32) into %ir.2, align 16)
$eax = COPY %0
RET 0, $eax
...

0 comments on commit b053359

Please sign in to comment.