Skip to content

Commit

Permalink
[AArch64] Combine fptoi.sat(fmul) to fixed point cvtf
Browse files Browse the repository at this point in the history
We already have patterns for fptosi and fptoui plus fmul to fixed point
convert, this adds equivalent patterns for fptosi.sat and fptoui.sat,
which should apply equally well for the legal saturating variants.

Differential Revision: https://reviews.llvm.org/D113199
  • Loading branch information
davemgreen committed Nov 8, 2021
1 parent 4375430 commit a982940
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 92 deletions.
31 changes: 27 additions & 4 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Expand Up @@ -3754,35 +3754,56 @@ defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;

// AArch64's FCVT instructions saturate when out of range.
multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
}
def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
(!cast<Instruction>(INST # UXSr) f32:$Rn)>;
def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
(!cast<Instruction>(INST # UWDr) f64:$Rn)>;
def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;

let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
}
def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
(!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
(!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
(!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
}

defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;

multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
}
def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;

let Predicates = [HasFullFP16] in {
def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
(!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
(!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
}
def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
(!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
Expand All @@ -3807,10 +3828,12 @@ multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, strin
(!cast<Instruction>(INST # UXDr) f64:$Rn)>;

// These instructions saturate like fp_to_[su]int_sat.
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
(!cast<Instruction>(INST # UWHr) f16:$Rn)>;
def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
(!cast<Instruction>(INST # UXHr) f16:$Rn)>;
}
def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
(!cast<Instruction>(INST # UWSr) f32:$Rn)>;
def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
Expand Down
110 changes: 22 additions & 88 deletions llvm/test/CodeGen/AArch64/fcvt-fixed.ll
Expand Up @@ -679,10 +679,7 @@ declare i64 @llvm.fptosi.sat.i64.f16(half)
define i32 @fcvtzs_sat_f32_i32_7(float %flt) {
; CHECK-LABEL: fcvtzs_sat_f32_i32_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1124073472
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fcvtzs w0, s0
; CHECK-NEXT: fcvtzs w0, s0, #7
; CHECK-NEXT: ret
%fix = fmul float %flt, 128.0
%cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix)
Expand All @@ -692,10 +689,7 @@ define i32 @fcvtzs_sat_f32_i32_7(float %flt) {
define i32 @fcvtzs_sat_f32_i32_32(float %flt) {
; CHECK-LABEL: fcvtzs_sat_f32_i32_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1333788672
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fcvtzs w0, s0
; CHECK-NEXT: fcvtzs w0, s0, #32
; CHECK-NEXT: ret
%fix = fmul float %flt, 4294967296.0
%cvt = call i32 @llvm.fptosi.sat.i32.f32(float %fix)
Expand All @@ -705,10 +699,7 @@ define i32 @fcvtzs_sat_f32_i32_32(float %flt) {
define i64 @fcvtzs_sat_f32_i64_64(float %flt) {
; CHECK-LABEL: fcvtzs_sat_f32_i64_64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1602224128
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fcvtzs x0, s0
; CHECK-NEXT: fcvtzs x0, s0, #64
; CHECK-NEXT: ret
%fix = fmul float %flt, 18446744073709551616.0
%cvt = call i64 @llvm.fptosi.sat.i64.f32(float %fix)
Expand All @@ -718,10 +709,7 @@ define i64 @fcvtzs_sat_f32_i64_64(float %flt) {
define i32 @fcvtzs_sat_f64_i32_7(double %dbl) {
; CHECK-LABEL: fcvtzs_sat_f64_i32_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4638707616191610880
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fcvtzs w0, d0
; CHECK-NEXT: fcvtzs w0, d0, #7
; CHECK-NEXT: ret
%fix = fmul double %dbl, 128.0
%cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix)
Expand All @@ -731,10 +719,7 @@ define i32 @fcvtzs_sat_f64_i32_7(double %dbl) {
define i32 @fcvtzs_sat_f64_i32_32(double %dbl) {
; CHECK-LABEL: fcvtzs_sat_f64_i32_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4751297606875873280
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fcvtzs w0, d0
; CHECK-NEXT: fcvtzs w0, d0, #32
; CHECK-NEXT: ret
%fix = fmul double %dbl, 4294967296.0
%cvt = call i32 @llvm.fptosi.sat.i32.f64(double %fix)
Expand All @@ -744,10 +729,7 @@ define i32 @fcvtzs_sat_f64_i32_32(double %dbl) {
define i64 @fcvtzs_sat_f64_i64_7(double %dbl) {
; CHECK-LABEL: fcvtzs_sat_f64_i64_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4638707616191610880
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fcvtzs x0, d0
; CHECK-NEXT: fcvtzs x0, d0, #7
; CHECK-NEXT: ret
%fix = fmul double %dbl, 128.0
%cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix)
Expand All @@ -757,10 +739,7 @@ define i64 @fcvtzs_sat_f64_i64_7(double %dbl) {
define i64 @fcvtzs_sat_f64_i64_64(double %dbl) {
; CHECK-LABEL: fcvtzs_sat_f64_i64_64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4895412794951729152
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fcvtzs x0, d0
; CHECK-NEXT: fcvtzs x0, d0, #64
; CHECK-NEXT: ret
%fix = fmul double %dbl, 18446744073709551616.0
%cvt = call i64 @llvm.fptosi.sat.i64.f64(double %fix)
Expand All @@ -781,10 +760,7 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
;
; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_7:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: adrp x8, .LCPI55_0
; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0]
; CHECK-FP16-NEXT: fmul h0, h0, h1
; CHECK-FP16-NEXT: fcvtzs w0, h0
; CHECK-FP16-NEXT: fcvtzs w0, h0, #7
; CHECK-FP16-NEXT: ret
%fix = fmul half %dbl, 128.0
%cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix)
Expand All @@ -805,10 +781,7 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
;
; CHECK-FP16-LABEL: fcvtzs_sat_f16_i32_15:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: adrp x8, .LCPI56_0
; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0]
; CHECK-FP16-NEXT: fmul h0, h0, h1
; CHECK-FP16-NEXT: fcvtzs w0, h0
; CHECK-FP16-NEXT: fcvtzs w0, h0, #15
; CHECK-FP16-NEXT: ret
%fix = fmul half %dbl, 32768.0
%cvt = call i32 @llvm.fptosi.sat.i32.f16(half %fix)
Expand All @@ -829,10 +802,7 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
;
; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_7:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: adrp x8, .LCPI57_0
; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0]
; CHECK-FP16-NEXT: fmul h0, h0, h1
; CHECK-FP16-NEXT: fcvtzs x0, h0
; CHECK-FP16-NEXT: fcvtzs x0, h0, #7
; CHECK-FP16-NEXT: ret
%fix = fmul half %dbl, 128.0
%cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix)
Expand All @@ -853,10 +823,7 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
;
; CHECK-FP16-LABEL: fcvtzs_sat_f16_i64_15:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: adrp x8, .LCPI58_0
; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0]
; CHECK-FP16-NEXT: fmul h0, h0, h1
; CHECK-FP16-NEXT: fcvtzs x0, h0
; CHECK-FP16-NEXT: fcvtzs x0, h0, #15
; CHECK-FP16-NEXT: ret
%fix = fmul half %dbl, 32768.0
%cvt = call i64 @llvm.fptosi.sat.i64.f16(half %fix)
Expand All @@ -875,10 +842,7 @@ declare i64 @llvm.fptoui.sat.i64.f16(half)
define i32 @fcvtzu_sat_f32_i32_7(float %flt) {
; CHECK-LABEL: fcvtzu_sat_f32_i32_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1124073472
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fcvtzu w0, s0
; CHECK-NEXT: fcvtzu w0, s0, #7
; CHECK-NEXT: ret
%fix = fmul float %flt, 128.0
%cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix)
Expand All @@ -888,10 +852,7 @@ define i32 @fcvtzu_sat_f32_i32_7(float %flt) {
define i32 @fcvtzu_sat_f32_i32_32(float %flt) {
; CHECK-LABEL: fcvtzu_sat_f32_i32_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1333788672
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fcvtzu w0, s0
; CHECK-NEXT: fcvtzu w0, s0, #32
; CHECK-NEXT: ret
%fix = fmul float %flt, 4294967296.0
%cvt = call i32 @llvm.fptoui.sat.i32.f32(float %fix)
Expand All @@ -901,10 +862,7 @@ define i32 @fcvtzu_sat_f32_i32_32(float %flt) {
define i64 @fcvtzu_sat_f32_i64_64(float %flt) {
; CHECK-LABEL: fcvtzu_sat_f32_i64_64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #1602224128
; CHECK-NEXT: fmov s1, w8
; CHECK-NEXT: fmul s0, s0, s1
; CHECK-NEXT: fcvtzu x0, s0
; CHECK-NEXT: fcvtzu x0, s0, #64
; CHECK-NEXT: ret
%fix = fmul float %flt, 18446744073709551616.0
%cvt = call i64 @llvm.fptoui.sat.i64.f32(float %fix)
Expand All @@ -914,10 +872,7 @@ define i64 @fcvtzu_sat_f32_i64_64(float %flt) {
define i32 @fcvtzu_sat_f64_i32_7(double %dbl) {
; CHECK-LABEL: fcvtzu_sat_f64_i32_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4638707616191610880
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fcvtzu w0, d0
; CHECK-NEXT: fcvtzu w0, d0, #7
; CHECK-NEXT: ret
%fix = fmul double %dbl, 128.0
%cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix)
Expand All @@ -927,10 +882,7 @@ define i32 @fcvtzu_sat_f64_i32_7(double %dbl) {
define i32 @fcvtzu_sat_f64_i32_32(double %dbl) {
; CHECK-LABEL: fcvtzu_sat_f64_i32_32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4751297606875873280
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fcvtzu w0, d0
; CHECK-NEXT: fcvtzu w0, d0, #32
; CHECK-NEXT: ret
%fix = fmul double %dbl, 4294967296.0
%cvt = call i32 @llvm.fptoui.sat.i32.f64(double %fix)
Expand All @@ -940,10 +892,7 @@ define i32 @fcvtzu_sat_f64_i32_32(double %dbl) {
define i64 @fcvtzu_sat_f64_i64_7(double %dbl) {
; CHECK-LABEL: fcvtzu_sat_f64_i64_7:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4638707616191610880
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fcvtzu x0, d0
; CHECK-NEXT: fcvtzu x0, d0, #7
; CHECK-NEXT: ret
%fix = fmul double %dbl, 128.0
%cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix)
Expand All @@ -953,10 +902,7 @@ define i64 @fcvtzu_sat_f64_i64_7(double %dbl) {
define i64 @fcvtzu_sat_f64_i64_64(double %dbl) {
; CHECK-LABEL: fcvtzu_sat_f64_i64_64:
; CHECK: // %bb.0:
; CHECK-NEXT: mov x8, #4895412794951729152
; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: fmul d0, d0, d1
; CHECK-NEXT: fcvtzu x0, d0
; CHECK-NEXT: fcvtzu x0, d0, #64
; CHECK-NEXT: ret
%fix = fmul double %dbl, 18446744073709551616.0
%cvt = call i64 @llvm.fptoui.sat.i64.f64(double %fix)
Expand All @@ -977,10 +923,7 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
;
; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_7:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: adrp x8, .LCPI66_0
; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0]
; CHECK-FP16-NEXT: fmul h0, h0, h1
; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: fcvtzu w0, h0, #7
; CHECK-FP16-NEXT: ret
%fix = fmul half %dbl, 128.0
%cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix)
Expand All @@ -1001,10 +944,7 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
;
; CHECK-FP16-LABEL: fcvtzu_sat_f16_i32_15:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: adrp x8, .LCPI67_0
; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0]
; CHECK-FP16-NEXT: fmul h0, h0, h1
; CHECK-FP16-NEXT: fcvtzu w0, h0
; CHECK-FP16-NEXT: fcvtzu w0, h0, #15
; CHECK-FP16-NEXT: ret
%fix = fmul half %dbl, 32768.0
%cvt = call i32 @llvm.fptoui.sat.i32.f16(half %fix)
Expand All @@ -1025,10 +965,7 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
;
; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_7:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: adrp x8, .LCPI68_0
; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0]
; CHECK-FP16-NEXT: fmul h0, h0, h1
; CHECK-FP16-NEXT: fcvtzu x0, h0
; CHECK-FP16-NEXT: fcvtzu x0, h0, #7
; CHECK-FP16-NEXT: ret
%fix = fmul half %dbl, 128.0
%cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix)
Expand All @@ -1049,10 +986,7 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
;
; CHECK-FP16-LABEL: fcvtzu_sat_f16_i64_15:
; CHECK-FP16: // %bb.0:
; CHECK-FP16-NEXT: adrp x8, .LCPI69_0
; CHECK-FP16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0]
; CHECK-FP16-NEXT: fmul h0, h0, h1
; CHECK-FP16-NEXT: fcvtzu x0, h0
; CHECK-FP16-NEXT: fcvtzu x0, h0, #15
; CHECK-FP16-NEXT: ret
%fix = fmul half %dbl, 32768.0
%cvt = call i64 @llvm.fptoui.sat.i64.f16(half %fix)
Expand Down

0 comments on commit a982940

Please sign in to comment.