Skip to content

Commit

Permalink
AMDGPU/SI: Select mad patterns to v_mac_f32
Browse files Browse the repository at this point in the history
The two-address instruction pass will convert these back to v_mad_f32
if necessary.

Differential Revision: http://reviews.llvm.org/D11060

llvm-svn: 242038
  • Loading branch information
tstellarAMD committed Jul 13, 2015
1 parent 0a43abc commit db5a11f
Show file tree
Hide file tree
Showing 14 changed files with 341 additions and 48 deletions.
19 changes: 19 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,11 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue &Offset, SDValue &GLC) const;
SDNode *SelectAddrSpaceCast(SDNode *N);
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp, SDValue &Omod) const;

bool SelectVOP3Mods0Clamp(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Omod) const;
Expand Down Expand Up @@ -1317,6 +1320,12 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
return true;
}

bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
bool Res = SelectVOP3Mods(In, Src, SrcMods);
return Res && cast<ConstantSDNode>(SrcMods)->isNullValue();
}

bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
Expand All @@ -1328,6 +1337,16 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}

bool AMDGPUDAGToDAGISel::SelectVOP3NoMods0(SDValue In, SDValue &Src,
SDValue &SrcMods, SDValue &Clamp,
SDValue &Omod) const {
bool Res = SelectVOP3Mods0(In, Src, SrcMods, Clamp, Omod);

return Res && cast<ConstantSDNode>(SrcMods)->isNullValue() &&
cast<ConstantSDNode>(Clamp)->isNullValue() &&
cast<ConstantSDNode>(Omod)->isNullValue();
}

bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp(SDValue In, SDValue &Src,
SDValue &SrcMods,
SDValue &Omod) const {
Expand Down
31 changes: 31 additions & 0 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,42 @@ static bool updateOperand(FoldCandidate &Fold,
return false;
}

static bool isUseMIInFoldList(const std::vector<FoldCandidate> &FoldList,
const MachineInstr *MI) {
for (auto Candidate : FoldList) {
if (Candidate.UseMI == MI)
return true;
}
return false;
}

static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
MachineOperand *OpToFold,
const SIInstrInfo *TII) {
if (!TII->isOperandLegal(MI, OpNo, OpToFold)) {

// Special case for v_mac_f32_e64 if we are trying to fold into src2
unsigned Opc = MI->getOpcode();
if (Opc == AMDGPU::V_MAC_F32_e64 &&
(int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
// Check if changing this to a v_mad_f32 instruction will allow us to
// fold the operand.
MI->setDesc(TII->get(AMDGPU::V_MAD_F32));
bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
if (FoldAsMAD) {
MI->untieRegOperand(OpNo);
return true;
}
MI->setDesc(TII->get(Opc));
}

// If we are already folding into another operand of MI, then
// we can't commute the instruction, otherwise we risk making the
// other fold illegal.
if (isUseMIInFoldList(FoldList, MI))
return false;

// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
unsigned CommuteIdx0;
Expand Down
56 changes: 51 additions & 5 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
return false;

unsigned Opc = UseMI->getOpcode();
if (Opc == AMDGPU::V_MAD_F32) {
if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
// Don't fold if we are using source modifiers. The new VOP2 instructions
// don't have them.
if (hasModifiersSet(*UseMI, AMDGPU::OpName::src0_modifiers) ||
Expand Down Expand Up @@ -963,9 +963,9 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
// instead of having to modify in place.

// Remove these first since they are at the end.
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::omod));
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::clamp));

unsigned Src1Reg = Src1->getReg();
Expand All @@ -980,6 +980,14 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
Src1->setSubReg(Src2SubReg);
Src1->setIsKill(Src2->isKill());

if (Opc == AMDGPU::V_MAC_F32_e64) {
UseMI->untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
}

UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::src2));
// ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(Imm);

removeModOperands(*UseMI);
Expand Down Expand Up @@ -1010,11 +1018,17 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
// instead of having to modify in place.

// Remove these first since they are at the end.
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::omod));
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(AMDGPU::V_MAD_F32,
UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
AMDGPU::OpName::clamp));

if (Opc == AMDGPU::V_MAC_F32_e64) {
UseMI->untieRegOperand(
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
}

// ChangingToImmediate adds Src2 back to the instruction.
Src2->ChangeToImmediate(Imm);

// These come before src2.
Expand Down Expand Up @@ -1126,6 +1140,38 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
return false;
}

MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
MachineBasicBlock::iterator &MI,
LiveVariables *LV) const {

switch (MI->getOpcode()) {
default: return nullptr;
case AMDGPU::V_MAC_F32_e64: break;
case AMDGPU::V_MAC_F32_e32: {
const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
if (Src0->isImm() && !isInlineConstant(*Src0, 4))
return nullptr;
break;
}
}

const MachineOperand *Dst = getNamedOperand(*MI, AMDGPU::OpName::dst);
const MachineOperand *Src0 = getNamedOperand(*MI, AMDGPU::OpName::src0);
const MachineOperand *Src1 = getNamedOperand(*MI, AMDGPU::OpName::src1);
const MachineOperand *Src2 = getNamedOperand(*MI, AMDGPU::OpName::src2);

return BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_MAD_F32))
.addOperand(*Dst)
.addImm(0) // Src0 mods
.addOperand(*Src0)
.addImm(0) // Src1 mods
.addOperand(*Src1)
.addImm(0) // Src mods
.addOperand(*Src2)
.addImm(0) // clamp
.addImm(0); // omod
}

bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
int64_t SVal = Imm.getSExtValue();
if (SVal >= -16 && SVal <= 64)
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,10 @@ class SIInstrInfo : public AMDGPUInstrInfo {

unsigned getMachineCSELookAheadLimit() const override { return 500; }

MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
MachineBasicBlock::iterator &MI,
LiveVariables *LV) const override;

bool isSALU(uint16_t Opcode) const {
return get(Opcode).TSFlags & SIInstrFlags::SALU;
}
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -529,9 +529,11 @@ def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;

def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">;
def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
def VOP3Mods0Clamp0OMod : ComplexPattern<untyped, 4, "SelectVOP3Mods0Clamp0OMod">;
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;

//===----------------------------------------------------------------------===//
// SI assembler operands
Expand Down Expand Up @@ -1113,6 +1115,13 @@ def VOP_MADK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins = (ins VCSrc_32:$src0, VGPR_32:$vsrc1, u32imm:$src2);
field string Asm = "$dst, $src0, $vsrc1, $src2";
}
def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
HasModifiers>.ret;
let Asm32 = getAsm32<2>.ret;
let Asm64 = getAsm64<2, HasModifiers>.ret;
}
def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
Expand Down
14 changes: 13 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1488,7 +1488,10 @@ defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;

defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_F32_F32_F32>;
let Constraints = "$dst = $src2", DisableEncoding="$src2",
isConvertibleToThreeAddress = 1 in {
defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>;
}
} // End isCommutable = 1

defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32">;
Expand Down Expand Up @@ -2206,6 +2209,15 @@ def : Pat <
(V_CNDMASK_B32_e64 $src2, $src1, $src0)
>;

// Pattern for V_MAC_F32
def : Pat <
(fmad (VOP3NoMods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
(VOP3NoMods f32:$src1, i32:$src1_modifiers),
(VOP3NoMods f32:$src2, i32:$src2_modifiers)),
(V_MAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
$src2_modifiers, $src2, $clamp, $omod)
>;

/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
Expand Down
16 changes: 14 additions & 2 deletions llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,15 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
// is vcc. We should handle this the same way we handle vopc, by addding
// a register allocation hint pre-regalloc and then do the shrining
// post-regalloc.
if (Src2)
return false;
if (Src2) {
if (MI.getOpcode() != AMDGPU::V_MAC_F32_e64)
return false;

const MachineOperand *Src2Mod =
TII->getNamedOperand(MI, AMDGPU::OpName::src2_modifiers);
if (!isVGPR(Src2, TRI, MRI) || (Src2Mod && Src2Mod->getImm() != 0))
return false;
}

const MachineOperand *Src1 = TII->getNamedOperand(MI, AMDGPU::OpName::src1);
const MachineOperand *Src1Mod =
Expand Down Expand Up @@ -259,6 +266,11 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (Src1)
Inst32.addOperand(*Src1);

const MachineOperand *Src2 =
TII->getNamedOperand(MI, AMDGPU::OpName::src2);
if (Src2)
Inst32.addOperand(*Src2);

++NumInstructionsShrunk;
MI.eraseFromParent();

Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/fmuladd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
declare float @llvm.fabs.f32(float) nounwind readnone

; CHECK-LABEL: {{^}}fmuladd_f32:
; CHECK: v_mad_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
; CHECK: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}

define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1,
float addrspace(1)* %in2, float addrspace(1)* %in3) {
Expand Down Expand Up @@ -34,8 +34,8 @@ define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
Expand All @@ -53,8 +53,8 @@ define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %
; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
Expand All @@ -72,8 +72,8 @@ define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %
; CHECK-LABEL: {{^}}fadd_a_a_b_f32:
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
; CHECK: buffer_store_dword [[R2]]
define void @fadd_a_a_b_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
Expand All @@ -94,8 +94,8 @@ define void @fadd_a_a_b_f32(float addrspace(1)* %out,
; CHECK-LABEL: {{^}}fadd_b_a_a_f32:
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
; CHECK: buffer_store_dword [[R2]]
define void @fadd_b_a_a_f32(float addrspace(1)* %out,
float addrspace(1)* %in1,
float addrspace(1)* %in2) {
Expand All @@ -116,8 +116,8 @@ define void @fadd_b_a_a_f32(float addrspace(1)* %out,
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
Expand All @@ -136,8 +136,8 @@ define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1
; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]]
; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
Expand All @@ -158,8 +158,8 @@ define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspa
; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32
; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], -2.0, [[R1]], [[R2]]
; CHECK: buffer_store_dword [[RESULT]]
; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]]
; CHECK: buffer_store_dword [[R2]]
define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) {
%tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
%gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone

; FUNC-LABEL: {{^}}test_lrp:
; SI: v_sub_f32
; SI: v_mad_f32
; SI: v_mac_f32_e32
define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
%mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
store float %mad, float addrspace(1)* %out, align 4
Expand Down
Loading

0 comments on commit db5a11f

Please sign in to comment.