4 changes: 4 additions & 0 deletions llvm/lib/IR/Attributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1437,6 +1437,10 @@ Type *AttributeList::getParamByValType(unsigned Index) const {
return getAttributes(Index+FirstArgIndex).getByValType();
}

Type *AttributeList::getParamPreallocatedType(unsigned Index) const {
return getAttributes(Index + FirstArgIndex).getPreallocatedType();
}

MaybeAlign AttributeList::getStackAlignment(unsigned Index) const {
return getAttributes(Index).getStackAlignment();
}
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/IR/Function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ bool Argument::hasInAllocaAttr() const {
return hasAttribute(Attribute::InAlloca);
}

bool Argument::hasPreallocatedAttr() const {
if (!getType()->isPointerTy())
return false;
return hasAttribute(Attribute::Preallocated);
}

bool Argument::hasPassPointeeByValueAttr() const {
if (!getType()->isPointerTy()) return false;
AttributeList Attrs = getParent()->getAttributes();
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/X86/X86CallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -789,8 +789,9 @@ def CC_X86_32_Vector_Darwin : CallingConv<[
/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
/// values are spilled on the stack.
def CC_X86_32_Common : CallingConv<[
// Handles byval parameters.
// Handles byval/preallocated parameters.
CCIfByVal<CCPassByVal<4, 4>>,
CCIfPreallocated<CCPassByVal<4, 4>>,

// The first 3 float or double arguments, if marked 'inreg' and if the call
// is not a vararg call and if SSE2 is available, are passed in SSE registers.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86FastISel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3245,7 +3245,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
return false;

for (auto Flag : CLI.OutFlags)
if (Flag.isSwiftError())
if (Flag.isSwiftError() || Flag.isPreallocated())
return false;

SmallVector<MVT, 16> OutVTs;
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/X86/X86FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,

bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo().hasVarSizedObjects() &&
!MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
!MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
!MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
}

/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
Expand All @@ -67,6 +68,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
bool
X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) ||
MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
(hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
TRI->hasBasePointer(MF);
}
Expand All @@ -90,10 +92,10 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
TRI->needsStackRealignment(MF) ||
MFI.hasVarSizedObjects() ||
TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() ||
MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
MFI.hasCopyImplyingStackAdjustment());
Expand Down
33 changes: 33 additions & 0 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5552,6 +5552,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::PREALLOCATED_SETUP: {
auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
auto CallId = MFI->getPreallocatedIdForCallSite(
cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
SDValue Chain = Node->getOperand(0);
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
MachineSDNode *New = CurDAG->getMachineNode(
TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::PREALLOCATED_ARG: {
auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
auto CallId = MFI->getPreallocatedIdForCallSite(
cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
SDValue Chain = Node->getOperand(0);
SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
SDValue ArgIndex = Node->getOperand(2);
SDValue Ops[3];
Ops[0] = CallIdValue;
Ops[1] = ArgIndex;
Ops[2] = Chain;
MachineSDNode *New = CurDAG->getMachineNode(
TargetOpcode::PREALLOCATED_ARG, dl,
CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
MVT::Other),
Ops);
ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
CurDAG->RemoveDeadNode(Node);
return;
}
}

SelectCode(Node);
Expand Down
53 changes: 49 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3945,6 +3945,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (ArgLocs.back().getLocMemOffset() != 0)
report_fatal_error("any parameter with the inalloca attribute must be "
"the only memory argument");
} else if (CLI.IsPreallocated) {
assert(ArgLocs.back().isMemLoc() &&
"cannot use preallocated attribute on a register "
"parameter");
SmallVector<size_t, 4> PreallocatedOffsets;
for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
}
}
auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
NumBytesToPush = 0;
}

if (!IsSibcall && !IsMustTail)
Expand Down Expand Up @@ -3972,9 +3987,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
++I, ++OutIndex) {
assert(OutIndex < Outs.size() && "Invalid Out index");
// Skip inalloca arguments, they have already been written.
// Skip inalloca/preallocated arguments, they have already been written.
ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
if (Flags.isInAlloca())
if (Flags.isInAlloca() || Flags.isPreallocated())
continue;

CCValAssign &VA = ArgLocs[I];
Expand Down Expand Up @@ -4161,8 +4176,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(VA.isMemLoc());
SDValue Arg = OutVals[OutsIndex];
ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
// Skip inalloca arguments. They don't require any work.
if (Flags.isInAlloca())
// Skip inalloca/preallocated arguments. They don't require any work.
if (Flags.isInAlloca() || Flags.isPreallocated())
continue;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
Expand Down Expand Up @@ -33076,6 +33091,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
BB->addLiveIn(BasePtr);
return BB;
}
case TargetOpcode::PREALLOCATED_SETUP: {
assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
MFI->setHasPreallocatedCall(true);
int64_t PreallocatedId = MI.getOperand(0).getImm();
size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
assert(StackAdjustment != 0 && "0 stack adjustment");
LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
<< StackAdjustment << "\n");
BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
.addReg(X86::ESP)
.addImm(StackAdjustment);
MI.eraseFromParent();
return BB;
}
case TargetOpcode::PREALLOCATED_ARG: {
assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
int64_t PreallocatedId = MI.getOperand(1).getImm();
int64_t ArgIdx = MI.getOperand(2).getImm();
auto MFI = MF->getInfo<X86MachineFunctionInfo>();
size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
<< ", arg offset " << ArgOffset << "\n");
// stack pointer + offset
addRegOffset(
BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
X86::ESP, false, ArgOffset);
MI.eraseFromParent();
return BB;
}
}
}

Expand Down
39 changes: 39 additions & 0 deletions llvm/lib/Target/X86/X86MachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
#define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H

#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"

Expand Down Expand Up @@ -103,6 +105,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// True if this function has WIN_ALLOCA instructions.
bool HasWinAlloca = false;

/// True if this function has any preallocated calls.
bool HasPreallocatedCall = false;

ValueMap<const Value *, size_t> PreallocatedIds;
SmallVector<size_t, 0> PreallocatedStackSizes;
SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;

private:
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
Expand Down Expand Up @@ -184,6 +193,36 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {

bool hasWinAlloca() const { return HasWinAlloca; }
void setHasWinAlloca(bool v) { HasWinAlloca = v; }

bool hasPreallocatedCall() const { return HasPreallocatedCall; }
void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }

size_t getPreallocatedIdForCallSite(const Value *CS) {
auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()});
if (Insert.second) {
PreallocatedStackSizes.push_back(0);
PreallocatedArgOffsets.emplace_back();
}
return Insert.first->second;
}

void setPreallocatedStackSize(size_t Id, size_t StackSize) {
PreallocatedStackSizes[Id] = StackSize;
}

size_t getPreallocatedStackSize(const size_t Id) {
assert(PreallocatedStackSizes[Id] != 0 && "stack size not set");
return PreallocatedStackSizes[Id];
}

void setPreallocatedArgOffsets(size_t Id, ArrayRef<size_t> AO) {
PreallocatedArgOffsets[Id].assign(AO.begin(), AO.end());
}

const ArrayRef<size_t> getPreallocatedArgOffsets(const size_t Id) {
assert(!PreallocatedArgOffsets[Id].empty() && "arg offsets not set");
return PreallocatedArgOffsets[Id];
}
};

} // End llvm namespace
Expand Down
28 changes: 16 additions & 12 deletions llvm/lib/Target/X86/X86RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,18 +627,22 @@ static bool CantUseSP(const MachineFrameInfo &MFI) {
}

bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();

if (!EnableBasePointer)
return false;

// When we need stack realignment, we can't address the stack from the frame
// pointer. When we have dynamic allocas or stack-adjusting inline asm, we
// can't address variables from the stack pointer. MS inline asm can
// reference locals while also adjusting the stack pointer. When we can't
// use both the SP and the FP, we need a separate base pointer register.
bool CantUseFP = needsStackRealignment(MF);
return CantUseFP && CantUseSP(MFI);
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
if (X86FI->hasPreallocatedCall())
return true;

const MachineFrameInfo &MFI = MF.getFrameInfo();

if (!EnableBasePointer)
return false;

// When we need stack realignment, we can't address the stack from the frame
// pointer. When we have dynamic allocas or stack-adjusting inline asm, we
// can't address variables from the stack pointer. MS inline asm can
// reference locals while also adjusting the stack pointer. When we can't
// use both the SP and the FP, we need a separate base pointer register.
bool CantUseFP = needsStackRealignment(MF);
return CantUseFP && CantUseSP(MFI);
}

bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Coroutines/CoroSplit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1015,9 +1015,9 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {

// CI should not has any ABI-impacting function attributes.
static const Attribute::AttrKind ABIAttrs[] = {
Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
Attribute::InReg, Attribute::Returned, Attribute::SwiftSelf,
Attribute::SwiftError};
Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
Attribute::Preallocated, Attribute::InReg, Attribute::Returned,
Attribute::SwiftSelf, Attribute::SwiftError};
AttributeList Attrs = CI.getAttributes();
for (auto AK : ABIAttrs)
if (Attrs.hasParamAttribute(0, AK))
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/IPO/Attributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1363,7 +1363,8 @@ bool Attributor::isValidFunctionSignatureRewrite(
AttributeList FnAttributeList = Fn->getAttributes();
if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) ||
FnAttributeList.hasAttrSomewhere(Attribute::StructRet) ||
FnAttributeList.hasAttrSomewhere(Attribute::InAlloca)) {
FnAttributeList.hasAttrSomewhere(Attribute::InAlloca) ||
FnAttributeList.hasAttrSomewhere(Attribute::Preallocated)) {
LLVM_DEBUG(
dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n");
return false;
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4455,7 +4455,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
AAValueSimplifyImpl::initialize(A);
if (!getAnchorScope() || getAnchorScope()->isDeclaration())
indicatePessimisticFixpoint();
if (hasAttr({Attribute::InAlloca, Attribute::StructRet, Attribute::Nest},
if (hasAttr({Attribute::InAlloca, Attribute::Preallocated,
Attribute::StructRet, Attribute::Nest},
/* IgnoreSubsumingPositions */ true))
indicatePessimisticFixpoint();

Expand Down Expand Up @@ -5695,7 +5696,7 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {

// TODO: From readattrs.ll: "inalloca parameters are always
// considered written"
if (hasAttr({Attribute::InAlloca})) {
if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) {
removeKnownBits(NO_WRITES);
removeAssumedBits(NO_WRITES);
}
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,10 @@ DeadArgumentEliminationPass::SurveyUses(const Value *V,
// We consider arguments of non-internal functions to be intrinsically alive as
// well as arguments to functions which have their "address taken".
void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
// Functions with inalloca parameters are expecting args in a particular
// register and memory layout.
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
// Functions with inalloca/preallocated parameters are expecting args in a
// particular register and memory layout.
if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
MarkLive(F);
return;
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/IPO/FunctionAttrs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ determinePointerReadAttrs(Argument *A,
SmallPtrSet<Use *, 32> Visited;

// inalloca arguments are always clobbered by the call.
if (A->hasInAllocaAttr())
if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
return Attribute::None;

bool IsRead = false;
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/IPO/GlobalOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2333,6 +2333,7 @@ OptimizeFunctions(Module &M,
// wouldn't be safe in the presence of inalloca.
// FIXME: We should also hoist alloca affected by this to the entry
// block if possible.
// FIXME: handle preallocated
if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
!F->hasAddressTaken()) {
RemoveAttribute(F, Attribute::InAlloca);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4737,6 +4737,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
//
// Similarly, avoid folding away bitcasts of byval calls.
if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) ||
Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
return false;

Expand Down
14 changes: 14 additions & 0 deletions llvm/test/CodeGen/X86/arg-copy-elide.ll
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,20 @@ entry:
; CHECK: calll _addrof_i32
; CHECK: retl

define void @avoid_preallocated(i32* preallocated(i32) %x) {
entry:
%x.p.p = alloca i32*
store i32* %x, i32** %x.p.p
call void @addrof_i32(i32* %x)
ret void
}

; CHECK-LABEL: _avoid_preallocated:
; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
; CHECK: pushl %[[reg]]
; CHECK: calll _addrof_i32
; CHECK: retl

; Don't elide the copy when the alloca is escaped with a store.
define void @escape_with_store(i32 %x) {
%x1 = alloca i32
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/X86/musttail-indirect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
; Each member pointer creates a thunk. The ones with inalloca are required to
; tail calls by the ABI, even at O0.

; TODO: add tests for preallocated/musttail once supported

%struct.B = type { i32 (...)** }
%struct.A = type { i32 }

Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/X86/musttail-thiscall.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; RUN: llc -verify-machineinstrs -mtriple=i686-- < %s | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=i686-- -O0 < %s | FileCheck %s

; TODO: add tests for preallocated/musttail once supported

; CHECK-LABEL: t1:
; CHECK: jmp {{_?}}t1_callee
define x86_thiscallcc void @t1(i8* %this) {
Expand Down
23 changes: 23 additions & 0 deletions llvm/test/CodeGen/X86/preallocated-nocall.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
; REQUIRES: asserts
; XFAIL: *

declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)

%Foo = type { i32, i32 }

declare void @init(%Foo*)



declare void @foo_p(%Foo* preallocated(%Foo))

define void @no_call() {
; CHECK-LABEL: _no_call:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
call void @init(%Foo* %b)
ret void
}
18 changes: 18 additions & 0 deletions llvm/test/CodeGen/X86/preallocated-x64.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
; RUN: llc %s -mtriple=x86_64-windows-msvc -o /dev/null 2>&1
; REQUIRES: asserts
; XFAIL: *

declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)

%Foo = type { i32, i32 }

declare x86_thiscallcc void @f(i32, %Foo* preallocated(%Foo))

define void @g() {
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
call void @f(i32 0, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}
187 changes: 187 additions & 0 deletions llvm/test/CodeGen/X86/preallocated.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s

declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)

%Foo = type { i32, i32 }

declare void @init(%Foo*)



declare void @foo_p(%Foo* preallocated(%Foo))

define void @one_preallocated() {
; CHECK-LABEL: _one_preallocated:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: calll _foo_p
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}

define void @one_preallocated_two_blocks() {
; CHECK-LABEL: _one_preallocated_two_blocks:
%t = call token @llvm.call.preallocated.setup(i32 1)
br label %second
second:
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: calll _foo_p
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}

define void @preallocated_with_store() {
; CHECK-LABEL: _preallocated_with_store:
; CHECK: subl $8, %esp
%t = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
%p0 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
%p1 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
store i32 13, i32* %p0
store i32 42, i32* %p1
; CHECK-DAG: movl $13, ([[REGISTER]])
; CHECK-DAG: movl $42, 4([[REGISTER]])
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_p
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}

define void @preallocated_with_init() {
; CHECK-LABEL: _preallocated_with_init:
; CHECK: subl $8, %esp
%t = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: pushl [[REGISTER]]
; CHECK: calll _init
call void @init(%Foo* %b)
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_p
call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}

declare void @foo_p_p(%Foo* preallocated(%Foo), %Foo* preallocated(%Foo))

define void @two_preallocated() {
; CHECK-LABEL: _two_preallocated:
%t = call token @llvm.call.preallocated.setup(i32 2)
%a1 = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b1 = bitcast i8* %a1 to %Foo*
%a2 = call i8* @llvm.call.preallocated.arg(token %t, i32 1) preallocated(%Foo)
%b2 = bitcast i8* %a2 to %Foo*
; CHECK: subl $16, %esp
; CHECK: calll _foo_p_p
call void @foo_p_p(%Foo* preallocated(%Foo) %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t)]
ret void
}

declare void @foo_p_int(%Foo* preallocated(%Foo), i32)

define void @one_preallocated_one_normal() {
; CHECK-LABEL: _one_preallocated_one_normal:
; CHECK: subl $12, %esp
%t = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: pushl [[REGISTER]]
; CHECK: calll _init
call void @init(%Foo* %b)
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: movl $2, 8(%esp)
; CHECK: calll _foo_p_int
call void @foo_p_int(%Foo* preallocated(%Foo) %b, i32 2) ["preallocated"(token %t)]
ret void
}

declare void @foo_ret_p(%Foo* sret, %Foo* preallocated(%Foo))

define void @nested_with_init() {
; CHECK-LABEL: _nested_with_init:
%tmp = alloca %Foo

%t1 = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: subl $12, %esp
%a1 = call i8* @llvm.call.preallocated.arg(token %t1, i32 0) preallocated(%Foo)
%b1 = bitcast i8* %a1 to %Foo*
; CHECK: leal 4(%esp), [[REGISTER1:%[a-z]+]]

%t2 = call token @llvm.call.preallocated.setup(i32 1)
; CHECK: subl $12, %esp
%a2 = call i8* @llvm.call.preallocated.arg(token %t2, i32 0) preallocated(%Foo)
; CHECK: leal 4(%esp), [[REGISTER2:%[a-z]+]]
%b2 = bitcast i8* %a2 to %Foo*

call void @init(%Foo* %b2)
; CHECK: pushl [[REGISTER2]]
; CHECK: calll _init

call void @foo_ret_p(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)]
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_ret_p
call void @foo_ret_p(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)]
; CHECK-NOT: subl {{\$[0-9]+}}, %esp
; CHECK-NOT: pushl
; CHECK: calll _foo_ret_p
ret void
}

declare void @foo_inreg_p(i32 inreg, %Foo* preallocated(%Foo))

define void @inreg() {
; CHECK-LABEL: _inreg:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: movl $9, %eax
; CHECK: calll _foo_inreg_p
call void @foo_inreg_p(i32 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}

declare x86_thiscallcc void @foo_thiscall_p(i8*, %Foo* preallocated(%Foo))

define void @thiscall() {
; CHECK-LABEL: _thiscall:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: xorl %ecx, %ecx
; CHECK: calll _foo_thiscall_p
call x86_thiscallcc void @foo_thiscall_p(i8* null, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
ret void
}

declare x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo))
declare x86_stdcallcc void @i(i32)

define void @stdcall() {
; CHECK-LABEL: _stdcall:
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
%b = bitcast i8* %a to %Foo*
; CHECK: subl $8, %esp
; CHECK: calll _foo_stdcall_p@8
call x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
; CHECK-NOT: %esp
; CHECK: pushl
; CHECK: calll _i@4
call x86_stdcallcc void @i(i32 0)
ret void
}
3 changes: 3 additions & 0 deletions llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s

; TODO: add preallocated versions of tests
; we don't yet support conditionally called preallocated calls after the setup

; chkstk cannot come before the usual prologue, since it adjusts ESP.
; If chkstk is used in the prologue, we also have to be careful about preserving
; EAX if it is used.
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@ target triple = "i386-pc-windows-msvc19.0.24215"
declare x86_stdcallcc void @tail_std(i32)
declare void @capture(i32*)

define x86_thiscallcc void @preallocated(i32* %this, i32* preallocated(i32) %args) {
entry:
%val = load i32, i32* %args
store i32 0, i32* %args
tail call x86_stdcallcc void @tail_std(i32 %val)
ret void
}

; CHECK-LABEL: _preallocated: # @preallocated
; CHECK: movl 4(%esp), %[[reg:[^ ]*]]
; CHECK: movl $0, 4(%esp)
; CHECK: pushl %[[reg]]
; CHECK: calll _tail_std@4
; CHECK: retl $4

define x86_thiscallcc void @inalloca(i32* %this, i32* inalloca %args) {
entry:
%val = load i32, i32* %args
Expand Down
20 changes: 20 additions & 0 deletions llvm/test/Transforms/Attributor/value-simplify.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
declare void @f(i32)
declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)

; Test1: Replace argument with constant
define internal void @test1(i32 %a) {
Expand Down Expand Up @@ -280,6 +282,24 @@ define i32* @complicated_args_inalloca() {
ret i32* %call
}

define internal i32* @test_preallocated(i32* preallocated(i32) %a) {
; CHECK-LABEL: define {{[^@]+}}@test_preallocated
; CHECK-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]])
; CHECK-NEXT: ret i32* [[A]]
;
ret i32* %a
}
define i32* @complicated_args_preallocated() {
; CHECK-LABEL: define {{[^@]+}}@complicated_args_preallocated()
; CHECK-NEXT: [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1)
; CHECK-NEXT: [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null)
; CHECK-NEXT: ret i32* [[CALL]]
;
%c = call token @llvm.call.preallocated.setup(i32 1)
%call = call i32* @test_preallocated(i32* preallocated(i32) null) ["preallocated"(token %c)]
ret i32* %call
}

define internal void @test_sret(%struct.X* sret %a, %struct.X** %b) {
;
; CHECK-LABEL: define {{[^@]+}}@test_sret
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/Transforms/DeadArgElim/keepalive.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
; RUN: opt < %s -deadargelim -S | FileCheck %s

declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)

%Ty = type <{ i32, i32 }>

; Check if the pass doesn't modify anything that doesn't need changing. We feed
Expand Down Expand Up @@ -44,4 +47,22 @@ define i32 @caller2() {
ret i32 %v
}

; We can't remove 'this' here, as that would put argmem in ecx instead of
; memory.
define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem) {
%v = load i32, i32* %argmem
ret i32 %v
}
; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem)

define i32 @caller3() {
%t = alloca i32
%c = call token @llvm.call.preallocated.setup(i32 1)
%M = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
%m = bitcast i8* %M to i32*
store i32 42, i32* %m
%v = call x86_thiscallcc i32 @unused_this_preallocated(i32* %t, i32* preallocated(i32) %m) ["preallocated"(token %c)]
ret i32 %v
}

; CHECK: attributes #0 = { nounwind }
10 changes: 10 additions & 0 deletions llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,16 @@ define void @test9_2(%struct.x* inalloca %a) nounwind {
ret void
}

; Test for preallocated handling.
define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
; CHECK-LABEL: @test9_3(
; CHECK-NEXT: ret void
;
%tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
store i32 1, i32* %tmp2, align 4
ret void
}

; DSE should delete the dead trampoline.
declare void @test11f()
define void @test11() {
Expand Down
10 changes: 10 additions & 0 deletions llvm/test/Transforms/DeadStoreElimination/simple.ll
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,16 @@ define void @test9_2(%struct.x* inalloca %a) nounwind {
ret void
}

; Test for preallocated handling.
define void @test9_3(%struct.x* preallocated(%struct.x) %a) nounwind {
; CHECK-LABEL: @test9_3(
; CHECK-NEXT: ret void
;
%tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
store i32 1, i32* %tmp2, align 4
ret void
}

; va_arg has fuzzy dependence, the store shouldn't be zapped.
define double @test10(i8* %X) {
; CHECK-LABEL: @test10(
Expand Down
6 changes: 6 additions & 0 deletions llvm/test/Transforms/FunctionAttrs/readattrs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ define void @test7_1(i32* inalloca %a) {
ret void
}

; CHECK: define void @test7_2(i32* nocapture preallocated(i32) %a)
; preallocated parameters are always considered written
define void @test7_2(i32* preallocated(i32) %a) {
ret void
}

; CHECK: define i32* @test8_1(i32* readnone returned %p)
define i32* @test8_1(i32* %p) {
entry:
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/Transforms/GlobalOpt/fastcc.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
; RUN: opt < %s -globalopt -S | FileCheck %s

declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)

define internal i32 @f(i32* %m) {
; CHECK-LABEL: define internal fastcc i32 @f
%v = load i32, i32* %m
Expand Down Expand Up @@ -32,6 +35,13 @@ define internal i32 @inalloca(i32* inalloca %p) {
ret i32 %rv
}

define internal i32 @preallocated(i32* preallocated(i32) %p) {
; TODO: handle preallocated:
; CHECK-NOT-LABEL: define internal fastcc i32 @preallocated(i32* %p)
%rv = load i32, i32* %p
ret i32 %rv
}

define void @call_things() {
%m = alloca i32
call i32 @f(i32* %m)
Expand All @@ -40,6 +50,11 @@ define void @call_things() {
call i32 @j(i32* %m)
%args = alloca inalloca i32
call i32 @inalloca(i32* inalloca %args)
; TODO: handle preallocated
;%c = call token @llvm.call.preallocated.setup(i32 1)
;%N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
;%n = bitcast i8* %N to i32*
;call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)]
ret void
}

Expand Down
28 changes: 28 additions & 0 deletions llvm/test/Transforms/InstCombine/call-cast-target-preallocated.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
; RUN: opt < %s -instcombine -S | FileCheck %s

target datalayout = "e-p:32:32"
target triple = "i686-pc-win32"


declare token @llvm.call.preallocated.setup(i32)
declare i8* @llvm.call.preallocated.arg(token, i32)

declare void @takes_i32(i32)
declare void @takes_i32_preallocated(i32* preallocated(i32))

define void @f() {
; CHECK-LABEL: define void @f()
%t = call token @llvm.call.preallocated.setup(i32 1)
%a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(i32)
%arg = bitcast i8* %a to i32*
call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* preallocated(i32) %arg) ["preallocated"(token %t)]
; CHECK: call void bitcast{{.*}}@takes_i32
ret void
}

define void @g() {
; CHECK-LABEL: define void @g()
call void bitcast (void (i32*)* @takes_i32_preallocated to void (i32)*)(i32 0)
; CHECK: call void bitcast{{.*}}@takes_i32_preallocated
ret void
}