Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 30 additions & 28 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -2008,34 +2008,36 @@ def : Pat<(int_nvvm_ull2d_rp i64:$a), (CVT_f64_u64 $a, CvtRP)>;
def : Pat<(int_nvvm_f2h_rn_ftz f32:$a), (CVT_f16_f32 $a, CvtRN_FTZ)>;
def : Pat<(int_nvvm_f2h_rn f32:$a), (CVT_f16_f32 $a, CvtRN)>;

def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
(CVT_e4m3x2_f32 $a, $b, CvtRN)>;
def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
(CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
(CVT_e5m2x2_f32 $a, $b, CvtRN)>;
def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
(CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;

def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn v2f16:$a),
(CVT_e4m3x2_f16x2 $a, CvtRN)>;
def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu v2f16:$a),
(CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn v2f16:$a),
(CVT_e5m2x2_f16x2 $a, CvtRN)>;
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu v2f16:$a),
(CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;

def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn i16:$a),
(CVT_f16x2_e4m3x2 $a, CvtRN)>;
def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu i16:$a),
(CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn i16:$a),
(CVT_f16x2_e5m2x2 $a, CvtRN)>;
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu i16:$a),
(CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;

let Predicates = [hasPTX<86>, hasSM<100>, hasArchAccelFeatures] in {
let Predicates = [callSubtarget<"hasFP8ConversionSupport">] in {
def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b),
(CVT_e4m3x2_f32 $a, $b, CvtRN)>;
def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b),
(CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>;
def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b),
(CVT_e5m2x2_f32 $a, $b, CvtRN)>;
def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b),
(CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>;

def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn v2f16:$a),
(CVT_e4m3x2_f16x2 $a, CvtRN)>;
def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu v2f16:$a),
(CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>;
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn v2f16:$a),
(CVT_e5m2x2_f16x2 $a, CvtRN)>;
def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu v2f16:$a),
(CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>;

def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn i16:$a),
(CVT_f16x2_e4m3x2 $a, CvtRN)>;
def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu i16:$a),
(CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>;
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn i16:$a),
(CVT_f16x2_e5m2x2 $a, CvtRN)>;
def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu i16:$a),
(CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>;
}

let Predicates = [callSubtarget<"hasNarrowFPConversionSupport">] in {
def : Pat<(int_nvvm_ff_to_e2m3x2_rn_satfinite f32:$a, f32:$b),
(CVT_e2m3x2_f32_sf $a, $b, CvtRN)>;
def : Pat<(int_nvvm_ff_to_e2m3x2_rn_relu_satfinite f32:$a, f32:$b),
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,27 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
hasPTXWithAccelSMs(86, {100, 101});
}

// Checks support for conversions involving e4m3x2 and e5m2x2.
bool hasFP8ConversionSupport() const {
if (PTXVersion >= 81)
return SmVersion >= 89;

if (PTXVersion >= 78)
return SmVersion >= 90;

return false;
}

// Checks support for conversions involving the following types:
// - e2m3x2/e3m2x2
// - e2m1x2
// - ue8m0x2
bool hasNarrowFPConversionSupport() const {
return hasPTXWithFamilySMs(90, {100, 110, 120}) ||
hasPTXWithFamilySMs(88, {100, 101, 120}) ||
hasPTXWithAccelSMs(86, {100, 101, 120});
}

// Prior to CUDA 12.3 ptxas did not recognize that the trap instruction
// terminates a basic block. Instead, it would assume that control flow
// continued to the next instruction. The next instruction could be in the
Expand Down