Skip to content

Commit

Permalink
AMDGPU: Start adding MODE register uses to instructions
Browse files Browse the repository at this point in the history
This is the groundwork required to implement strictfp. For now, this
should be NFC for regular instructoins (many instructions just gain an
extra use of a reserved register). Regalloc won't rematerialize
instructions with reads of physical registers, but we were suffering
from that anyway with the exec reads.

Should add it for all the related FP uses (possibly with some
extras). I did not add it to either the gpr index mode instructions
(or every single VALU instruction) since it's a ridiculous feature
already modeled as an arbitrary side effect.

Also work towards marking instructions with FP exceptions. This
doesn't actually set the bit yet since this would start to change
codegen. It seems nofpexcept is currently not implied from the regular
IR FP operations. Add it to some MIR tests where I think it might
matter.
  • Loading branch information
arsenm committed May 27, 2020
1 parent 13f6c81 commit 4b44963
Show file tree
Hide file tree
Showing 120 changed files with 1,318 additions and 1,224 deletions.
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrFormats.td
Expand Up @@ -114,6 +114,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// FLAT_SCRATCH segment. Must be 0 for non-FLAT instructions.
field bit IsNonFlatSeg = 0;

// Reads the mode register, usually for FP environment.
field bit ReadsModeReg = 0;

// This bit indicates that this uses the floating point double precision
// rounding mode flags
field bit FPDPRounding = 0;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -64,7 +64,7 @@ def : ExpComprPattern<v2f16, EXP_DONE, -1>;
// Used to inject printing of "_e32" suffix for VI (there are "_e64" variants for VI)
def VINTRPDst : VINTRPDstOperand <VGPR_32>;

let Uses = [M0, EXEC] in {
let Uses = [MODE, M0, EXEC] in {

// FIXME: Specify SchedRW for VINTRP instructions.

Expand Down Expand Up @@ -109,7 +109,7 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
[(set f32:$vdst, (int_amdgcn_interp_mov (i32 timm:$vsrc),
(i32 timm:$attrchan), (i32 timm:$attr), M0))]>;

} // End Uses = [M0, EXEC]
} // End Uses = [MODE, M0, EXEC]

//===----------------------------------------------------------------------===//
// Pseudo Instructions
Expand Down
29 changes: 20 additions & 9 deletions llvm/lib/Target/AMDGPU/SOPInstructions.td
Expand Up @@ -807,8 +807,10 @@ def S_SETREG_B32 : SOPK_Pseudo <
"s_setreg_b32",
(outs), (ins SReg_32:$sdst, hwreg:$simm16),
"$simm16, $sdst",
[(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]
>;
[(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]> {
let Defs = [MODE];
let Uses = [MODE];
}

// FIXME: Not on SI?
//def S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32">;
Expand All @@ -819,6 +821,8 @@ def S_SETREG_IMM32_B32 : SOPK_Pseudo <
"$simm16, $imm"> {
let Size = 8; // Unlike every other SOPK instruction.
let has_sdst = 0;
let Defs = [MODE];
let Uses = [MODE];
}

} // End hasSideEffects = 1
Expand Down Expand Up @@ -953,6 +957,10 @@ def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>;
} // End SubtargetPredicate = isGFX8Plus

let SubtargetPredicate = HasVGPRIndexMode in {
// Setting the GPR index mode is really writing the fields in the mode
// register. We don't want to add mode register uses to every
// instruction, and it's too complicated to deal with anyway. This is
// modeled just as a side effect.
def S_SET_GPR_IDX_ON : SOPC <0x11,
(outs),
(ins SSrc_b32:$src0, GPRIdxMode:$src1),
Expand Down Expand Up @@ -1209,13 +1217,16 @@ let SubtargetPredicate = isGFX10Plus in {
}
def S_WAITCNT_DEPCTR :
SOPP <0x023, (ins s16imm:$simm16), "s_waitcnt_depctr $simm16">;
def S_ROUND_MODE :
SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
def S_DENORM_MODE :
SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
[(SIdenorm_mode (i32 timm:$simm16))]> {
let hasSideEffects = 1;
}

let hasSideEffects = 1, Uses = [MODE], Defs = [MODE] in {
// FIXME: Should remove hasSideEffects
def S_ROUND_MODE :
SOPP<0x024, (ins s16imm:$simm16), "s_round_mode $simm16">;
def S_DENORM_MODE :
SOPP<0x025, (ins i32imm:$simm16), "s_denorm_mode $simm16",
[(SIdenorm_mode (i32 timm:$simm16))]>;
}

def S_TTRACEDATA_IMM :
SOPP<0x028, (ins s16imm:$simm16), "s_ttracedata_imm $simm16">;
} // End SubtargetPredicate = isGFX10Plus
Expand Down
34 changes: 31 additions & 3 deletions llvm/lib/Target/AMDGPU/VOP1Instructions.td
Expand Up @@ -48,9 +48,14 @@ class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP1On
let mayStore = 0;
let hasSideEffects = 0;

let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);

// FIXME
// let mayRaiseFPException = ReadsModeReg;

let VOP1 = 1;
let VALU = 1;
let Uses = [EXEC];
let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);

let AsmVariantName = AMDGPUAsmVariants.Default;
}
Expand Down Expand Up @@ -186,31 +191,51 @@ def V_READFIRSTLANE_B32 :

let SchedRW = [WriteDoubleCvt] in {
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;

let mayRaiseFPException = 0 in {
defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
}

defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;

let mayRaiseFPException = 0 in {
defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP1_F64_I32, uint_to_fp>;
}

} // End SchedRW = [WriteDoubleCvt]

let SchedRW = [WriteFloatCvt] in {

// XXX: Does this really not raise exceptions? The manual claims the
// 16-bit ones can.
let mayRaiseFPException = 0 in {
defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP1_F32_I32, sint_to_fp>;
defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP1_F32_I32, uint_to_fp>;
}

defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
let FPDPRounding = 1 in {
defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
} // End FPDPRounding = 1

defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;

let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
} // End ReadsModeReg = 0, mayRaiseFPException = 0
} // End SchedRW = [WriteFloatCvt]

let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;
defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP1_F32_I32, AMDGPUcvt_f32_ubyte1>;
defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP1_F32_I32, AMDGPUcvt_f32_ubyte2>;
defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP1_F32_I32, AMDGPUcvt_f32_ubyte3>;
} // ReadsModeReg = 0, mayRaiseFPException = 0

defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
Expand Down Expand Up @@ -417,8 +442,11 @@ let SubtargetPredicate = isGFX9Plus in {
}

defm V_SAT_PK_U8_I16 : VOP1Inst<"v_sat_pk_u8_i16", VOP_I32_I32>;
defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>;
defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>;

let mayRaiseFPException = 0 in {
defm V_CVT_NORM_I16_F16 : VOP1Inst<"v_cvt_norm_i16_f16", VOP_I16_F16>;
defm V_CVT_NORM_U16_F16 : VOP1Inst<"v_cvt_norm_u16_f16", VOP_I16_F16>;
} // End mayRaiseFPException = 0
} // End SubtargetPredicate = isGFX9Plus

let SubtargetPredicate = isGFX9Only in {
Expand Down
11 changes: 10 additions & 1 deletion llvm/lib/Target/AMDGPU/VOP2Instructions.td
Expand Up @@ -69,9 +69,14 @@ class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suf
let mayStore = 0;
let hasSideEffects = 0;

let ReadsModeReg = !or(isFloatType<P.DstVT>.ret, isFloatType<P.Src0VT>.ret);

// FIXME: Set this
// let mayRaiseFPException = ReadsModeReg;

let VOP2 = 1;
let VALU = 1;
let Uses = [EXEC];
let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);

let AsmVariantName = AMDGPUAsmVariants.Default;
}
Expand Down Expand Up @@ -529,8 +534,12 @@ defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_NO_EXT<VOP_I32_I32
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_NO_EXT<VOP_I32_I32_I32>, int_amdgcn_mbcnt_hi>;
defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_NO_EXT<VOP_F32_F32_I32>, AMDGPUldexp>;
defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_NO_EXT<VOP_I32_F32_I32>>; // TODO: set "Uses = dst"

let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_i16_f32>;
defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_NO_EXT<VOP_V2I16_F32_F32>, AMDGPUpknorm_u16_f32>;
}

defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_NO_EXT<VOP_V2F16_F32_F32>, AMDGPUpkrtz_f16_f32>;
defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_u16_u32>;
defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_NO_EXT<VOP_V2I16_I32_I32>, AMDGPUpk_i16_i32>;
Expand Down
27 changes: 20 additions & 7 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Expand Up @@ -290,8 +290,11 @@ class VOP3_INTERP16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {

let isCommutable = 1 in {

let mayRaiseFPException = 0 in {
def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>;
}

def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fma>;
Expand All @@ -314,7 +317,7 @@ def V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
def V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
} // End SchedRW = [WriteQuarterRate32]

let Uses = [VCC, EXEC] in {
let Uses = [MODE, VCC, EXEC] in {
// v_div_fmas_f32:
// result = src0 * src1 + src2
// if (vcc)
Expand All @@ -336,15 +339,20 @@ def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC, []>

} // End isCommutable = 1

let mayRaiseFPException = 0 in {
def V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
def V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
def V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
} // End mayRaiseFPException

def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;

let mayRaiseFPException = 0 in { // XXX - Seems suspect but manual doesn't say it does
def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
Expand All @@ -354,6 +362,8 @@ def V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDG
def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
} // End mayRaiseFPException = 0

def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
Expand All @@ -366,6 +376,8 @@ def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_
def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1


let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it does.
def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
let SchedRW = [WriteFloatFMA, WriteSALU];
let AsmMatchConverter = "";
Expand All @@ -377,6 +389,7 @@ def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64,
let AsmMatchConverter = "";
let FPDPRounding = 1;
}
} // End mayRaiseFPException = 0

def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;

Expand Down Expand Up @@ -471,7 +484,7 @@ def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CL
def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_CLAMP>>;
let FPDPRounding = 1 in {
def V_MAD_F16 : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
let Uses = [M0, EXEC] in {
let Uses = [MODE, M0, EXEC] in {
// For some reason the intrinsic operands are in a different order
// from the instruction operands.
def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i32, f32]>,
Expand All @@ -482,7 +495,7 @@ def V_INTERP_P2_F16 : VOP3Interp <"v_interp_p2_f16", VOP3_INTERP16<[f16, f32, i3
(i32 timm:$attr),
(i1 timm:$high),
M0))]>;
} // End Uses = [M0, EXEC]
} // End Uses = [M0, MODE, EXEC]
} // End FPDPRounding = 1
} // End renamedInGFX9 = 1

Expand All @@ -498,7 +511,7 @@ def V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
} // End SubtargetPredicate = isGFX9Plus

let Uses = [M0, EXEC], FPDPRounding = 1 in {
let Uses = [MODE, M0, EXEC], FPDPRounding = 1 in {
def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32, i32, untyped]>,
[(set f32:$vdst, (int_amdgcn_interp_p1_f16 (VOP3Mods f32:$src0, i32:$src0_modifiers),
(i32 timm:$attrchan),
Expand All @@ -512,15 +525,15 @@ def V_INTERP_P1LL_F16 : VOP3Interp <"v_interp_p1ll_f16", VOP3_INTERP16<[f32, f32


def V_INTERP_P1LV_F16 : VOP3Interp <"v_interp_p1lv_f16", VOP3_INTERP16<[f32, f32, i32, f16]>>;
} // End Uses = [M0, EXEC], FPDPRounding = 1
} // End Uses = [MODE, M0, EXEC], FPDPRounding = 1

} // End SubtargetPredicate = Has16BitInsts, isCommutable = 1

let SubtargetPredicate = isGFX8Plus, Uses = [M0, EXEC] in {
let SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC] in {
def V_INTERP_P1_F32_e64 : VOP3Interp <"v_interp_p1_f32", VOP3_INTERP>;
def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
} // End SubtargetPredicate = isGFX8Plus, Uses = [M0, EXEC]
} // End SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC]

let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {

Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Expand Up @@ -149,10 +149,11 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
}

let SubtargetPredicate = HasMadMixInsts in {

// These are VOP3a-like opcodes which accept no omod.
// Size of src arguments (16/32) is controlled by op_sel.
// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi.
let isCommutable = 1 in {
let isCommutable = 1, mayRaiseFPException = 0 in {
def V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;

let FPDPRounding = 1 in {
Expand Down Expand Up @@ -370,7 +371,8 @@ def V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite> {
let isMoveImm = 1;
}

let isConvergent = 1 in {
// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
def V_MFMA_F32_4X4X1F32 : VOP3Inst<"v_mfma_f32_4x4x1f32", VOPProfileMAI_F32_F32_X4, int_amdgcn_mfma_f32_4x4x1f32>;
def V_MFMA_F32_4X4X4F16 : VOP3Inst<"v_mfma_f32_4x4x4f16", VOPProfileMAI_F32_V4F16_X4, int_amdgcn_mfma_f32_4x4x4f16>;
def V_MFMA_I32_4X4X4I8 : VOP3Inst<"v_mfma_i32_4x4x4i8", VOPProfileMAI_I32_I32_X4, int_amdgcn_mfma_i32_4x4x4i8>;
Expand All @@ -391,7 +393,7 @@ def V_MFMA_I32_32X32X4I8 : VOP3Inst<"v_mfma_i32_32x32x4i8", VOPProfileMAI_I3
def V_MFMA_I32_32X32X8I8 : VOP3Inst<"v_mfma_i32_32x32x8i8", VOPProfileMAI_I32_I32_X16, int_amdgcn_mfma_i32_32x32x8i8>;
def V_MFMA_F32_32X32X2BF16 : VOP3Inst<"v_mfma_f32_32x32x2bf16", VOPProfileMAI_F32_V2I16_X32, int_amdgcn_mfma_f32_32x32x2bf16>;
def V_MFMA_F32_32X32X4BF16 : VOP3Inst<"v_mfma_f32_32x32x4bf16", VOPProfileMAI_F32_V2I16_X16, int_amdgcn_mfma_f32_32x32x4bf16>;
} // End isConvergent = 1
} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1

} // End SubtargetPredicate = HasMAIInsts

Expand Down
8 changes: 7 additions & 1 deletion llvm/lib/Target/AMDGPU/VOPCInstructions.td
Expand Up @@ -92,9 +92,11 @@ class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[],
let mayStore = 0;
let hasSideEffects = 0;

let ReadsModeReg = isFloatType<P.Src0VT>.ret;

let VALU = 1;
let VOPC = 1;
let Uses = [EXEC];
let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]);
let Defs = !if(DefVcc, [VCC], []);

VOPProfile Pfl = P;
Expand Down Expand Up @@ -738,6 +740,9 @@ multiclass VOPC_CLASS_F64 <string opName> :
multiclass VOPCX_CLASS_F64 <string opName> :
VOPCX_Class_Pseudos <opName, VOPC_I1_F64_I32, VOPC_F64_I32>;

// cmp_class ignores the FP mode and faithfully reports the unmodified
// source value.
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">;
Expand All @@ -747,6 +752,7 @@ let SubtargetPredicate = Has16BitInsts in {
defm V_CMP_CLASS_F16 : VOPC_CLASS_F16 <"v_cmp_class_f16">;
defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
}
} // End ReadsModeReg = 0, mayRaiseFPException = 0

//===----------------------------------------------------------------------===//
// V_ICMPIntrinsic Pattern.
Expand Down

0 comments on commit 4b44963

Please sign in to comment.