Skip to content

Commit

Permalink
[NVPTX] Remove ftz variants of cvt with rounding mode
Browse files Browse the repository at this point in the history
These do not exist in ptxas, it refuses to compile them.

Differential Revision: https://reviews.llvm.org/D51042

llvm-svn: 340317
  • Loading branch information
d0k committed Aug 21, 2018
1 parent 8e3f093 commit d66dde5
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 94 deletions.
42 changes: 6 additions & 36 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -2624,32 +2624,20 @@ def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
// f16 -> sint
def : Pat<(i1 (fp_to_sint Float16Regs:$a)),
(SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
(CVT_s16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i16 (fp_to_sint Float16Regs:$a)),
(CVT_s16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
(CVT_s32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i32 (fp_to_sint Float16Regs:$a)),
(CVT_s32_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
(CVT_s64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i64 (fp_to_sint Float16Regs:$a)),
(CVT_s64_f16 Float16Regs:$a, CvtRZI)>;

// f16 -> uint
def : Pat<(i1 (fp_to_uint Float16Regs:$a)),
(SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
(CVT_u16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i16 (fp_to_uint Float16Regs:$a)),
(CVT_u16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
(CVT_u32_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i32 (fp_to_uint Float16Regs:$a)),
(CVT_u32_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
(CVT_u64_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(i64 (fp_to_uint Float16Regs:$a)),
(CVT_u64_f16 Float16Regs:$a, CvtRZI)>;

Expand Down Expand Up @@ -2947,14 +2935,10 @@ def : Pat<(i32 (zext (ctpop Int16Regs:$a))),
(POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE))>;

// fpround f32 -> f16
def : Pat<(f16 (fpround Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f16 (fpround Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN)>;

// fpround f64 -> f16
def : Pat<(f16 (fpround Float64Regs:$a)),
(CVT_f16_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f16 (fpround Float64Regs:$a)),
(CVT_f16_f64 Float64Regs:$a, CvtRN)>;

Expand All @@ -2971,8 +2955,6 @@ def : Pat<(f32 (fpextend Float16Regs:$a)),
(CVT_f32_f16 Float16Regs:$a, CvtNONE)>;

// fpextend f16 -> f64
def : Pat<(f64 (fpextend Float16Regs:$a)),
(CVT_f64_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f64 (fpextend Float16Regs:$a)),
(CVT_f64_f16 Float16Regs:$a, CvtNONE)>;

Expand All @@ -2988,9 +2970,7 @@ def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
// fceil, ffloor, fround, ftrunc.

def : Pat<(fceil Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fceil Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>;
(CVT_f16_f16 Float16Regs:$a, CvtRPI)>;
def : Pat<(fceil Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fceil Float32Regs:$a),
Expand All @@ -2999,20 +2979,16 @@ def : Pat<(fceil Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRPI)>;

def : Pat<(ffloor Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ffloor Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;
(CVT_f16_f16 Float16Regs:$a, CvtRMI)>;
def : Pat<(ffloor Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ffloor Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;
def : Pat<(ffloor Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRMI)>;

def : Pat<(fround Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f16 (fround Float16Regs:$a)),
(CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
(CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(fround Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(f32 (fround Float32Regs:$a)),
Expand All @@ -3021,9 +2997,7 @@ def : Pat<(f64 (fround Float64Regs:$a)),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;

def : Pat<(ftrunc Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ftrunc Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>;
(CVT_f16_f16 Float16Regs:$a, CvtRZI)>;
def : Pat<(ftrunc Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(ftrunc Float32Regs:$a),
Expand All @@ -3036,9 +3010,7 @@ def : Pat<(ftrunc Float64Regs:$a),
// matches what CUDA's "libm" does.

def : Pat<(fnearbyint Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fnearbyint Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
(CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(fnearbyint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(fnearbyint Float32Regs:$a),
Expand All @@ -3047,9 +3019,7 @@ def : Pat<(fnearbyint Float64Regs:$a),
(CVT_f64_f64 Float64Regs:$a, CvtRNI)>;

def : Pat<(frint Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(frint Float16Regs:$a),
(CVT_f16_f16 Float16Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
(CVT_f16_f16 Float16Regs:$a, CvtRNI)>;
def : Pat<(frint Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
def : Pat<(frint Float32Regs:$a),
Expand Down

0 comments on commit d66dde5

Please sign in to comment.