Skip to content

Commit

Permalink
AMDGPU: Fix folding SGPRs into madak/madmk src0
Browse files Browse the repository at this point in the history
Because of the special immediate operand, the constant
bus is already used so SGPRs are never useful.

r263212 changed the name of the immediate operand, which
broke the verifier check for the restriction.

llvm-svn: 274564
  • Loading branch information
arsenm committed Jul 5, 2016
1 parent a8d89f3 commit ffc8275
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 6 deletions.
7 changes: 7 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;

// 32-bit VALU immediate operand that uses the constant bus.
def u32kimm : Operand<i32> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_KIMM32";
let PrintMethod = "printU32ImmOperand";
}

let OperandType = "OPERAND_IMMEDIATE" in {

def u32imm : Operand<i32> {
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ namespace AMDGPU {
/// Operand with register or 32-bit immediate
OPERAND_REG_IMM32 = MCOI::OPERAND_FIRST_TARGET,
/// Operand with register or inline constant
OPERAND_REG_INLINE_C
OPERAND_REG_INLINE_C,

/// Operand with 32-bit immediate that uses the constant bus. The standard
/// OPERAND_IMMEDIATE should be used for special immediates such as source
/// modifiers.
OPERAND_KIMM32
};
}
}
Expand Down
14 changes: 11 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1695,6 +1695,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
}
break;
case MCOI::OPERAND_IMMEDIATE:
case AMDGPU::OPERAND_KIMM32:
// Check if this operand is an immediate.
// FrameIndex operands will be replaced by immediates, so they are
// allowed.
Expand Down Expand Up @@ -1731,6 +1732,10 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };

unsigned ConstantBusCount = 0;

if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
++ConstantBusCount;

unsigned SGPRUsed = findImplicitSGPRRead(MI);
if (SGPRUsed != AMDGPU::NoRegister)
++ConstantBusCount;
Expand Down Expand Up @@ -2020,9 +2025,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
if (i == OpIdx)
continue;
const MachineOperand &Op = MI.getOperand(i);
if (Op.isReg() &&
(Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
usesConstantBus(MRI, Op, getOpSize(MI, i))) {
if (Op.isReg()) {
if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
usesConstantBus(MRI, Op, getOpSize(MI, i))) {
return false;
}
} else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
return false;
}
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1582,12 +1582,12 @@ def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;

def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32imm:$imm);
field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32kimm:$imm);
field string Asm32 = "$vdst, $src0, $src1, $imm";
field bit HasExt = 0;
}
def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
field dag Ins32 = (ins VCSrc_32:$src0, u32imm:$imm, VGPR_32:$src1);
field dag Ins32 = (ins VCSrc_32:$src0, u32kimm:$imm, VGPR_32:$src1);
field string Asm32 = "$vdst, $src0, $imm, $src1";
field bit HasExt = 0;
}
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/AMDGPU/madak.ll
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,32 @@ define void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float
store float %madak, float addrspace(1)* %out.gep, align 4
ret void
}

; SIFoldOperands should not fold the SGPR copy into the instruction
; because the implicit immediate already uses the constant bus.
; GCN-LABEL: {{^}}madak_constant_bus_violation:
; GCN: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xa|0x28}}
; GCN: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
; GCN: buffer_load_dword [[VGPR:v[0-9]+]]
; GCN: v_madak_f32_e32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[VGPR]], [[MADAK]]
; GCN: buffer_store_dword [[MUL]]
define void @madak_constant_bus_violation(i32 %arg1, float %sgpr0, float %sgpr1) #0 {
bb:
%tmp = icmp eq i32 %arg1, 0
br i1 %tmp, label %bb3, label %bb4

bb3:
store volatile float 0.0, float addrspace(1)* undef
br label %bb4

bb4:
%vgpr = load volatile float, float addrspace(1)* undef
%tmp0 = fmul float %sgpr0, 0.5
%tmp1 = fadd float %tmp0, 42.0
%tmp2 = fmul float %tmp1, %vgpr
store volatile float %tmp2, float addrspace(1)* undef, align 4
ret void
}

attributes #0 = { nounwind}

0 comments on commit ffc8275

Please sign in to comment.