Skip to content

Commit

Permalink
[AMDGPU] Introduce new sched classes for transcendental instructions
Browse files Browse the repository at this point in the history
This is in preparation for scheduling them slightly differently on
gfx10. NFC.

Differential Revision: https://reviews.llvm.org/D81011
  • Loading branch information
jayfoad committed Jun 4, 2020
1 parent 52ed34d commit 9ce0f7e
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 16 deletions.
15 changes: 14 additions & 1 deletion llvm/lib/Target/AMDGPU/SISchedule.td
Expand Up @@ -27,9 +27,13 @@ def WriteBarrier : SchedWrite;
def MIVGPRRead : SchedRead;
def MIMFMARead : SchedRead;

// Vector ALU instructions
// Normal 16 or 32 bit VALU instructions
def Write32Bit : SchedWrite;
// Conversion to or from F32 (but not converting F64 to or from F32)
def WriteFloatCvt : SchedWrite;
// F16 or F32 transcendental instructions (these are quarter rate)
def WriteTrans32 : SchedWrite;
// Other quarter rate VALU instructions
def WriteQuarterRate32 : SchedWrite;

def WriteFloatFMA : SchedWrite;
Expand All @@ -43,6 +47,10 @@ def WriteDoubleAdd : SchedWrite;
// Conversion to or from f64 instruction
def WriteDoubleCvt : SchedWrite;

// F64 "transcendental" (actually only reciprocal and/or square root)
// instructions
def WriteTrans64 : SchedWrite;

// Half rate 64-bit instructions.
def Write64Bit : SchedWrite;

Expand Down Expand Up @@ -128,6 +136,7 @@ multiclass SICommonWriteRes {
def : HWVALUWriteRes<Write32Bit, 1>;
def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteFloatCvt, 4>;
def : HWVALUWriteRes<WriteTrans32, 4>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
def : HWVALUWriteRes<Write2PassMAI, 2>;
def : HWVALUWriteRes<Write8PassMAI, 8>;
Expand Down Expand Up @@ -164,6 +173,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>;
def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : HWVALUWriteRes<WriteTrans64, 4>;

def : InstRW<[WriteCopy], (instrs COPY)>;

Expand All @@ -177,6 +187,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 16>;
def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : HWVALUWriteRes<WriteTrans64, 16>;

def : InstRW<[WriteCopy], (instrs COPY)>;

Expand All @@ -189,11 +200,13 @@ let SchedModel = GFX10SpeedModel in {
def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteFloatCvt, [HWVALU, HWRC], 5>;
def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>;
def : HWWriteRes<WriteTrans32, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>;
def : HWWriteRes<WriteTrans64, [HWVALU, HWRC], 17>;

def : HWWriteRes<WriteBranch, [HWBranch], 32>;
def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
Expand Down
27 changes: 12 additions & 15 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Expand Up @@ -243,28 +243,25 @@ defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;

let SchedRW = [WriteQuarterRate32] in {
let SchedRW = [WriteTrans32] in {
defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>;
defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>;
} // End SchedRW = [WriteQuarterRate32]
} // End SchedRW = [WriteTrans32]

let SchedRW = [WriteDouble] in {
let SchedRW = [WriteTrans64] in {
defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
} // End SchedRW = [WriteDouble];

let SchedRW = [WriteDouble] in {
defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>;
} // End SchedRW = [WriteDouble]
} // End SchedRW = [WriteTrans64]

let SchedRW = [WriteQuarterRate32] in {
let SchedRW = [WriteTrans32] in {
defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>;
defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
} // End SchedRW = [WriteQuarterRate32]
} // End SchedRW = [WriteTrans32]

defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>;
Expand Down Expand Up @@ -345,7 +342,7 @@ defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>;
defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;

let SubtargetPredicate = isGFX6GFX7 in {
let SchedRW = [WriteQuarterRate32] in {
let SchedRW = [WriteTrans32] in {
defm V_LOG_CLAMP_F32 :
VOP1Inst<"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
defm V_RCP_CLAMP_F32 :
Expand All @@ -356,7 +353,7 @@ let SubtargetPredicate = isGFX6GFX7 in {
VOP1Inst<"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
defm V_RSQ_LEGACY_F32 :
VOP1Inst<"v_rsq_legacy_f32", VOP_F32_F32, int_amdgcn_rsq_legacy>;
} // End SchedRW = [WriteQuarterRate32]
} // End SchedRW = [WriteTrans32]

let SchedRW = [WriteDouble] in {
defm V_RCP_CLAMP_F64 :
Expand All @@ -367,10 +364,10 @@ let SubtargetPredicate = isGFX6GFX7 in {
} // End SubtargetPredicate = isGFX6GFX7

let SubtargetPredicate = isGFX7GFX8GFX9 in {
let SchedRW = [WriteQuarterRate32] in {
let SchedRW = [WriteTrans32] in {
defm V_LOG_LEGACY_F32 : VOP1Inst<"v_log_legacy_f32", VOP_F32_F32>;
defm V_EXP_LEGACY_F32 : VOP1Inst<"v_exp_legacy_f32", VOP_F32_F32>;
} // End SchedRW = [WriteQuarterRate32]
} // End SchedRW = [WriteTrans32]
} // End SubtargetPredicate = isGFX7GFX8GFX9

let SubtargetPredicate = isGFX7Plus in {
Expand All @@ -390,15 +387,15 @@ defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>;
} // End FPDPRounding = 1
defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
let SchedRW = [WriteQuarterRate32] in {
let SchedRW = [WriteTrans32] in {
defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>;
defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;
defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>;
defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>;
defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
} // End SchedRW = [WriteQuarterRate32]
} // End SchedRW = [WriteTrans32]
defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>;
defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;
Expand Down

0 comments on commit 9ce0f7e

Please sign in to comment.