Skip to content

Commit

Permalink
[AMDGPU][MC] Fix for Bug 28207 + LIT tests
Browse files Browse the repository at this point in the history
Enabled clamp and omod for v_cvt_* opcodes which have src0 of an integer type

Reviewers: vpykhtin, arsenm

Differential Revision: https://reviews.llvm.org/D31327

llvm-svn: 298852
  • Loading branch information
dpreobra committed Mar 27, 2017
1 parent 862a412 commit c512d44
Show file tree
Hide file tree
Showing 6 changed files with 226 additions and 18 deletions.
15 changes: 15 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Expand Up @@ -162,6 +162,9 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
SDValue &Clamp,
SDValue &Omod) const;

bool SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const;

bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
SDValue &Clamp) const;
Expand Down Expand Up @@ -1669,6 +1672,18 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
return SelectVOP3Mods(In, Src, SrcMods);
}

bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
SDValue &Clamp, SDValue &Omod) const {
Src = In;

SDLoc DL(In);
// FIXME: Handle Clamp and Omod
Clamp = CurDAG->getTargetConstant(0, DL, MVT::i32);
Omod = CurDAG->getTargetConstant(0, DL, MVT::i32);

return true;
}

bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
Expand Down
33 changes: 33 additions & 0 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Expand Up @@ -1018,11 +1018,13 @@ class AMDGPUAsmParser : public MCTargetAsmParser {

void cvtId(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands);

void cvtVOP3Impl(MCInst &Inst,
const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands);
void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);

void cvtMIMG(MCInst &Inst, const OperandVector &Operands);
Expand Down Expand Up @@ -3678,6 +3680,15 @@ void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands)
}
}

void AMDGPUAsmParser::cvtVOP3_omod(MCInst &Inst, const OperandVector &Operands) {
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if (TSFlags & SIInstrFlags::VOP3) {
cvtVOP3OMod(Inst, Operands);
} else {
cvtId(Inst, Operands);
}
}

static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
// 1. This operand is input modifiers
return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
Expand Down Expand Up @@ -3737,6 +3748,28 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
}
}

void AMDGPUAsmParser::cvtVOP3OMod(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;

unsigned I = 1;
const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
}

for (unsigned E = Operands.size(); I != E; ++I) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (Op.isMod()) {
OptionalIdx[Op.getImmTy()] = I;
} else {
Op.addRegOrImmOperands(Inst, 1);
}
}

addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
}

void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptIdx;

Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Expand Up @@ -659,6 +659,8 @@ def VOP3NoMods : ComplexPattern<untyped, 2, "SelectVOP3NoMods">;
// VOP3Mods, but the input source is known to never be NaN.
def VOP3Mods_nnan : ComplexPattern<fAny, 2, "SelectVOP3Mods_NNaN">;

def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;

def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
def VOP3PMods0 : ComplexPattern<untyped, 3, "SelectVOP3PMods0">;

Expand Down
54 changes: 39 additions & 15 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Expand Up @@ -85,10 +85,17 @@ class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
}

class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
list<dag> ret = !if(P.HasModifiers,
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
[(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
list<dag> ret =
!if(P.HasModifiers,
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
i32:$src0_modifiers,
i1:$clamp, i32:$omod))))],
!if(P.HasOMod,
[(set P.DstVT:$vdst, (node (P.Src0VT (VOP3OMods P.Src0VT:$src0,
i1:$clamp, i32:$omod))))],
[(set P.DstVT:$vdst, (node P.Src0VT:$src0))]
)
);
}

multiclass VOP1Inst <string opName, VOPProfile P,
Expand All @@ -98,6 +105,23 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
}

// Special profile for instructions which have clamp
// and output modifiers (but have no input modifiers)
class VOPProfileI2F<ValueType dstVt, ValueType srcVt> :
VOPProfile<[dstVt, srcVt, untyped, untyped]> {

let Ins64 = (ins Src0RC64:$src0, clampmod:$clamp, omod:$omod);
let Asm64 = "$vdst, $src0$clamp$omod";

let HasModifiers = 0;
let HasClamp = 1;
let HasOMod = 1;
}

def VOP1_F64_I32 : VOPProfileI2F <f64, i32>;
def VOP1_F32_I32 : VOPProfileI2F <f32, i32>;
def VOP1_F16_I16 : VOPProfileI2F <f16, i16>;

//===----------------------------------------------------------------------===//
// VOP1 Instructions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -144,24 +168,24 @@ def V_READFIRSTLANE_B32 :

let SchedRW = [WriteQuarterRate32] in {
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP_F32_I32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
} // End SchedRW = [WriteQuarterRate32]

defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
Expand Down Expand Up @@ -299,8 +323,8 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;

let SubtargetPredicate = isVI in {

defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>;
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>;
defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>;
defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Target/AMDGPU/VOPInstructions.td
Expand Up @@ -107,8 +107,12 @@ class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern = [],
let AsmVariantName = AMDGPUAsmVariants.VOP3;
let AsmMatchConverter =
!if(!eq(VOP3Only,1),
!if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
!if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", ""));
!if(!and(P.IsPacked, isVOP3P), "cvtVOP3P", "cvtVOP3"),
!if(!eq(P.HasModifiers, 1),
"cvtVOP3_2_mod",
!if(!eq(P.HasOMod, 1), "cvtVOP3OMod", "")
)
);

VOPProfile Pfl = P;
}
Expand Down
132 changes: 131 additions & 1 deletion llvm/test/MC/AMDGPU/vop3-modifiers.s
Expand Up @@ -255,4 +255,134 @@ v_cubeid_f32 v0, s0, s0, neg(0x3e22f983)
// CHECK: [0x00,0x00,0xc4,0xd1,0x00,0x00,0xe0,0x83]

v_cubeid_f32 v0, s0, s0, abs(0x3e22f983)
// CHECK: [0x00,0x04,0xc4,0xd1,0x00,0x00,0xe0,0x03]
// CHECK: [0x00,0x04,0xc4,0xd1,0x00,0x00,0xe0,0x03]


//---------------------------------------------------------------------------//
// VOP3 Instructions without Input Modifiers but with Output Modifiers
//---------------------------------------------------------------------------//

v_cvt_f64_i32_e64 v[5:6], s1 clamp
// CHECK: [0x05,0x80,0x44,0xd1,0x01,0x00,0x00,0x00]

v_cvt_f64_i32_e64 v[5:6], s1 mul:2
// CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x08]

v_cvt_f64_i32_e64 v[5:6], s1 mul:4
// CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x10]

v_cvt_f64_i32_e64 v[5:6], s1 div:2
// CHECK: [0x05,0x00,0x44,0xd1,0x01,0x00,0x00,0x18]


v_cvt_f64_u32_e64 v[5:6], s1 clamp
// CHECK: [0x05,0x80,0x56,0xd1,0x01,0x00,0x00,0x00]

v_cvt_f64_u32_e64 v[5:6], s1 mul:2
// CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x08]

v_cvt_f64_u32_e64 v[5:6], s1 mul:4
// CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x10]

v_cvt_f64_u32_e64 v[5:6], s1 div:2
// CHECK: [0x05,0x00,0x56,0xd1,0x01,0x00,0x00,0x18]


v_cvt_f32_i32_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x45,0xd1,0x01,0x00,0x00,0x00]

v_cvt_f32_i32_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x08]

v_cvt_f32_i32_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x10]

v_cvt_f32_i32_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x45,0xd1,0x01,0x00,0x00,0x18]


v_cvt_f32_u32_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x46,0xd1,0x01,0x00,0x00,0x00]

v_cvt_f32_u32_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x08]

v_cvt_f32_u32_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x10]

v_cvt_f32_u32_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x46,0xd1,0x01,0x00,0x00,0x18]


v_cvt_off_f32_i4_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x4e,0xd1,0x01,0x00,0x00,0x00]

v_cvt_off_f32_i4_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x08]

v_cvt_off_f32_i4_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x10]

v_cvt_off_f32_i4_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x4e,0xd1,0x01,0x00,0x00,0x18]


v_cvt_f32_ubyte0_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x51,0xd1,0x01,0x00,0x00,0x00]

v_cvt_f32_ubyte0_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x08]

v_cvt_f32_ubyte0_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x10]

v_cvt_f32_ubyte0_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x51,0xd1,0x01,0x00,0x00,0x18]


v_cvt_f32_ubyte1_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x52,0xd1,0x01,0x00,0x00,0x00]

v_cvt_f32_ubyte1_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x08]

v_cvt_f32_ubyte1_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x10]

v_cvt_f32_ubyte1_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x52,0xd1,0x01,0x00,0x00,0x18]


v_cvt_f32_ubyte2_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x53,0xd1,0x01,0x00,0x00,0x00]

v_cvt_f32_ubyte2_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x08]

v_cvt_f32_ubyte2_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x10]

v_cvt_f32_ubyte2_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x53,0xd1,0x01,0x00,0x00,0x18]


v_cvt_f32_ubyte3_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x54,0xd1,0x01,0x00,0x00,0x00]

v_cvt_f32_ubyte3_e64 v5, s1 mul:2
// CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x08]

v_cvt_f32_ubyte3_e64 v5, s1 mul:4
// CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x10]

v_cvt_f32_ubyte3_e64 v5, s1 div:2
// CHECK: [0x05,0x00,0x54,0xd1,0x01,0x00,0x00,0x18]


// NB: output modifiers are not supported for f16
v_cvt_f16_i16_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x7a,0xd1,0x01,0x00,0x00,0x00]

// NB: output modifiers are not supported for f16
v_cvt_f16_u16_e64 v5, s1 clamp
// CHECK: [0x05,0x80,0x79,0xd1,0x01,0x00,0x00,0x00]

0 comments on commit c512d44

Please sign in to comment.