Skip to content

Commit

Permalink
[X86] Part 2 to fix x86-64 fp128 calling convention.
Browse files Browse the repository at this point in the history
Part 1 was submitted in http://reviews.llvm.org/D15134.
Changes in this part:
* X86RegisterInfo.td, X86RecognizableInstr.cpp: Add FR128 register class.
* X86CallingConv.td: Pass f128 values in XMM registers or on stack.
* X86InstrCompiler.td, X86InstrInfo.td, X86InstrSSE.td:
  Add instruction selection patterns for f128.
* X86ISelLowering.cpp:
  When target has MMX registers, configure MVT::f128 in FR128RegClass,
  with TypeSoftenFloat action, and custom actions for some opcodes.
  Add missed cases of MVT::f128 in places that handle f32, f64, or vector types.
  Add TODO comment to support f128 type in inline assembly code.
* SelectionDAGBuilder.cpp:
  Fix infinite loop when f128 type can have
  VT == TLI.getTypeToTransformTo(Ctx, VT).
* Add unit tests for x86-64 fp128 type.

Differential Revision: http://reviews.llvm.org/D11438

llvm-svn: 255558
  • Loading branch information
chih-hung committed Dec 14, 2015
1 parent f801290 commit 7993e18
Show file tree
Hide file tree
Showing 16 changed files with 1,052 additions and 25 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Expand Up @@ -2451,7 +2451,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {

// We care about the legality of the operation after it has been type
// legalized.
while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
VT != TLI.getTypeToTransformTo(Ctx, VT))
VT = TLI.getTypeToTransformTo(Ctx, VT);

// If the vselect is legal, assume we want to leave this as a vector setcc +
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/X86/X86CallingConv.td
Expand Up @@ -158,6 +158,7 @@ def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>,

// MMX vector types are always returned in XMM0.
CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
Expand Down Expand Up @@ -293,7 +294,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[v64i1], CCPromoteToType<v64i8>>,

// The first 8 FP/Vector arguments are passed in XMM registers.
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,

Expand All @@ -318,7 +319,7 @@ def CC_X86_64_C : CallingConv<[

// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
CCIfType<[f80], CCAssignToStack<0, 0>>,
CCIfType<[f80, f128], CCAssignToStack<0, 0>>,

// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
Expand Down
59 changes: 46 additions & 13 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -296,13 +296,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::BR_CC , MVT::f32, Expand);
setOperationAction(ISD::BR_CC , MVT::f64, Expand);
setOperationAction(ISD::BR_CC , MVT::f80, Expand);
setOperationAction(ISD::BR_CC , MVT::f128, Expand);
setOperationAction(ISD::BR_CC , MVT::i8, Expand);
setOperationAction(ISD::BR_CC , MVT::i16, Expand);
setOperationAction(ISD::BR_CC , MVT::i32, Expand);
setOperationAction(ISD::BR_CC , MVT::i64, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f80, Expand);
setOperationAction(ISD::SELECT_CC , MVT::f128, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i8, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i16, Expand);
setOperationAction(ISD::SELECT_CC , MVT::i32, Expand);
Expand Down Expand Up @@ -415,12 +417,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SELECT , MVT::f32 , Custom);
setOperationAction(ISD::SELECT , MVT::f64 , Custom);
setOperationAction(ISD::SELECT , MVT::f80 , Custom);
setOperationAction(ISD::SELECT , MVT::f128 , Custom);
setOperationAction(ISD::SETCC , MVT::i8 , Custom);
setOperationAction(ISD::SETCC , MVT::i16 , Custom);
setOperationAction(ISD::SETCC , MVT::i32 , Custom);
setOperationAction(ISD::SETCC , MVT::f32 , Custom);
setOperationAction(ISD::SETCC , MVT::f64 , Custom);
setOperationAction(ISD::SETCC , MVT::f80 , Custom);
setOperationAction(ISD::SETCC , MVT::f128 , Custom);
setOperationAction(ISD::SETCCE , MVT::i8 , Custom);
setOperationAction(ISD::SETCCE , MVT::i16 , Custom);
setOperationAction(ISD::SETCCE , MVT::i32 , Custom);
Expand Down Expand Up @@ -619,8 +623,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FMA, MVT::f32, Expand);

// Long double always uses X87.
// Long double always uses X87, except f128 in MMX.
if (!Subtarget->useSoftFloat()) {
if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
addRegisterClass(MVT::f128, &X86::FR128RegClass);
ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
setOperationAction(ISD::FABS , MVT::f128, Custom);
setOperationAction(ISD::FNEG , MVT::f128, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
}

addRegisterClass(MVT::f80, &X86::RFP80RegClass);
setOperationAction(ISD::UNDEF, MVT::f80, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
Expand Down Expand Up @@ -2363,7 +2375,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
EVT CopyVT = VA.getLocVT();

// If this is x86-64, and we disabled SSE, we can't return FP values
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
report_fatal_error("SSE register return with SSE disabled");
}
Expand Down Expand Up @@ -2647,6 +2659,8 @@ SDValue X86TargetLowering::LowerFormalArguments(
RC = &X86::FR32RegClass;
else if (RegVT == MVT::f64)
RC = &X86::FR64RegClass;
else if (RegVT == MVT::f128)
RC = &X86::FR128RegClass;
else if (RegVT.is512BitVector())
RC = &X86::VR512RegClass;
else if (RegVT.is256BitVector())
Expand Down Expand Up @@ -13410,6 +13424,8 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();

bool IsF128 = (VT == MVT::f128);

// FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
// decide if we should generate a 16-byte constant mask when we only need 4 or
// 8 bytes for the scalar case.
Expand All @@ -13422,6 +13438,11 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
LogicVT = VT;
EltVT = VT.getVectorElementType();
NumElts = VT.getVectorNumElements();
} else if (IsF128) {
// SSE instructions are used for optimized f128 logical operations.
LogicVT = MVT::f128;
EltVT = VT;
NumElts = 1;
} else {
// There are no scalar bitwise logical SSE/AVX instructions, so we
// generate a 16-byte vector constant and logic op even for the scalar case.
Expand Down Expand Up @@ -13453,7 +13474,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;

if (VT.isVector())
if (VT.isVector() || IsF128)
return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);

// For the scalar case extend to a 128-bit vector, perform the logic op,
Expand All @@ -13472,6 +13493,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
MVT SrcVT = Op1.getSimpleValueType();
bool IsF128 = (VT == MVT::f128);

// If second operand is smaller, extend it first.
if (SrcVT.bitsLT(VT)) {
Expand All @@ -13486,13 +13508,16 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {

// At this point the operands and the result should have the same
// type, and that won't be f80 since that is not custom lowered.
assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) &&
"Unexpected type in LowerFCOPYSIGN");

const fltSemantics &Sem =
VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle;
VT == MVT::f64 ? APFloat::IEEEdouble :
(IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
const unsigned SizeInBits = VT.getSizeInBits();

SmallVector<Constant *, 4> CV(
VT == MVT::f64 ? 2 : 4,
VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4),
ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));

// First, clear all bits but the sign bit from the second operand (sign).
Expand All @@ -13505,12 +13530,13 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
// Perform all logic operations as 16-byte vectors because there are no
// scalar FP logic instructions in SSE. This allows load folding of the
// constants into the logic instructions.
MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32);
SDValue Mask1 =
DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
false, false, false, 16);
Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
if (!IsF128)
Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1);

// Next, clear the sign bit from the first operand (magnitude).
Expand All @@ -13519,8 +13545,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
APFloat APF = Op0CN->getValueAPF();
// If the magnitude is a positive zero, the sign bit alone is enough.
if (APF.isPosZero())
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
DAG.getIntPtrConstant(0, dl));
return IsF128 ? SignBit :
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
DAG.getIntPtrConstant(0, dl));
APF.clearSign();
CV[0] = ConstantFP::get(*Context, APF);
} else {
Expand All @@ -13536,13 +13563,15 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
false, false, false, 16);
// If the magnitude operand wasn't a constant, we need to AND out the sign.
if (!isa<ConstantFPSDNode>(Op0)) {
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
if (!IsF128)
Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val);
}
// OR the magnitude value with the sign bit.
Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
DAG.getIntPtrConstant(0, dl));
return IsF128 ? Val :
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
DAG.getIntPtrConstant(0, dl));
}

static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
Expand Down Expand Up @@ -22158,6 +22187,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_FR32:
case X86::CMOV_FR64:
case X86::CMOV_FR128:
case X86::CMOV_GR8:
case X86::CMOV_GR16:
case X86::CMOV_GR32:
Expand Down Expand Up @@ -23821,7 +23851,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// ignored in unsafe-math mode).
// We also try to create v2f32 min/max nodes, which we later widen to v4f32.
if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
VT != MVT::f80 && VT != MVT::f128 &&
(TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
(Subtarget->hasSSE2() ||
(Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
Expand Down Expand Up @@ -27946,6 +27977,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case MVT::f64:
case MVT::i64:
return std::make_pair(0U, &X86::FR64RegClass);
// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
// Vector types.
case MVT::v16i8:
case MVT::v8i16:
Expand Down Expand Up @@ -28058,6 +28090,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// target independent register mapper will just pick the first match it can
// find, ignoring the required type.

// TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
if (VT == MVT::f32 || VT == MVT::i32)
Res.second = &X86::FR32RegClass;
else if (VT == MVT::f64 || VT == MVT::i64)
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/X86/X86InstrCompiler.td
Expand Up @@ -512,6 +512,7 @@ let usesCustomInserter = 1, Uses = [EFLAGS] in {

defm _FR32 : CMOVrr_PSEUDO<FR32, f32>;
defm _FR64 : CMOVrr_PSEUDO<FR64, f64>;
defm _FR128 : CMOVrr_PSEUDO<FR128, f128>;
defm _V4F32 : CMOVrr_PSEUDO<VR128, v4f32>;
defm _V2F64 : CMOVrr_PSEUDO<VR128, v2f64>;
defm _V2I64 : CMOVrr_PSEUDO<VR128, v2i64>;
Expand Down
11 changes: 6 additions & 5 deletions llvm/lib/Target/X86/X86InstrInfo.td
Expand Up @@ -955,11 +955,12 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
return false;
}]>;

def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr))>;
def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;

def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
Expand Down
58 changes: 58 additions & 0 deletions llvm/lib/Target/X86/X86InstrSSE.td
Expand Up @@ -413,6 +413,8 @@ let Predicates = [HasSSE2] in {
def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
def : Pat<(f128 (bitconvert (i128 FR128:$src))), (f128 FR128:$src)>;
def : Pat<(i128 (bitconvert (f128 FR128:$src))), (i128 FR128:$src)>;
}

// Bitcasts between 256-bit vector types. Return the original type since
Expand Down Expand Up @@ -8851,3 +8853,59 @@ let mayLoad = 1, Constraints
defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>;
}
}

//===----------------------------------------------------------------------===//
// Extra selection patterns for FR128, f128, f128mem

// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
def : Pat<(store (f128 FR128:$src), addr:$dst),
(MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;

def : Pat<(loadf128 addr:$src),
(COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>;

// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)),
(COPY_TO_REGCLASS
(ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
FR128)>;

def : Pat<(X86fand FR128:$src1, FR128:$src2),
(COPY_TO_REGCLASS
(ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;

def : Pat<(and FR128:$src1, FR128:$src2),
(COPY_TO_REGCLASS
(ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;

def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)),
(COPY_TO_REGCLASS
(ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
FR128)>;

def : Pat<(X86for FR128:$src1, FR128:$src2),
(COPY_TO_REGCLASS
(ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;

def : Pat<(or FR128:$src1, FR128:$src2),
(COPY_TO_REGCLASS
(ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;

def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)),
(COPY_TO_REGCLASS
(XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
FR128)>;

def : Pat<(X86fxor FR128:$src1, FR128:$src2),
(COPY_TO_REGCLASS
(XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;

def : Pat<(xor FR128:$src1, FR128:$src2),
(COPY_TO_REGCLASS
(XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
(COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
2 changes: 2 additions & 0 deletions llvm/lib/Target/X86/X86RegisterInfo.td
Expand Up @@ -423,6 +423,8 @@ def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;

def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;

def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>;


// FIXME: This sets up the floating point register files as though they are f64
// values, though they really are f80 values. This will cause us to spill
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/CodeGen/X86/fp128-calling-conv.ll
@@ -0,0 +1,47 @@
; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s

; __float128 myFP128 = 1.0L; // x86_64-linux-android
@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16

; The first few parameters are passed in registers and the other are on stack.

define fp128 @TestParam_FP128_0(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
entry:
ret fp128 %d0
; CHECK-LABEL: TestParam_FP128_0:
; CHECK-NOT: mov
; CHECK: retq
}

define fp128 @TestParam_FP128_1(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
entry:
ret fp128 %d1
; CHECK-LABEL: TestParam_FP128_1:
; CHECK: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
}

define fp128 @TestParam_FP128_7(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
entry:
ret fp128 %d7
; CHECK-LABEL: TestParam_FP128_7:
; CHECK: movaps %xmm7, %xmm0
; CHECK-NEXT: retq
}

define fp128 @TestParam_FP128_8(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
entry:
ret fp128 %d8
; CHECK-LABEL: TestParam_FP128_8:
; CHECK: movaps 8(%rsp), %xmm0
; CHECK-NEXT: retq
}

define fp128 @TestParam_FP128_9(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
entry:
ret fp128 %d9
; CHECK-LABEL: TestParam_FP128_9:
; CHECK: movaps 24(%rsp), %xmm0
; CHECK-NEXT: retq
}

0 comments on commit 7993e18

Please sign in to comment.