-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[WebAssembly] Ofast pmin/pmax pattern matchers #100107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-webassembly Author: Sam Parker (sparker-arm) ChangesWith fast-math, the ordered setcc nodes are converted to setcc nodes which do not care about NaNs, so add patterns that use setlt, setle, setgt and setge. Patch is 25.27 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100107.diff 3 Files Affected:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 2ee430c88169d..d6c6425b10041 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1270,7 +1270,11 @@ def pmin : PatFrags<(ops node:$lhs, node:$rhs), [
(vselect (setolt $rhs, $lhs), $rhs, $lhs),
(vselect (setole $rhs, $lhs), $rhs, $lhs),
(vselect (setogt $lhs, $rhs), $rhs, $lhs),
- (vselect (setoge $lhs, $rhs), $rhs, $lhs)
+ (vselect (setoge $lhs, $rhs), $rhs, $lhs),
+ (vselect (setlt $lhs, $rhs), $lhs, $rhs),
+ (vselect (setle $lhs, $rhs), $lhs, $rhs),
+ (vselect (setgt $lhs, $rhs), $rhs, $lhs),
+ (vselect (setge $lhs, $rhs), $rhs, $lhs)
]>;
defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>;
@@ -1279,7 +1283,11 @@ def pmax : PatFrags<(ops node:$lhs, node:$rhs), [
(vselect (setogt $rhs, $lhs), $rhs, $lhs),
(vselect (setoge $rhs, $lhs), $rhs, $lhs),
(vselect (setolt $lhs, $rhs), $rhs, $lhs),
- (vselect (setole $lhs, $rhs), $rhs, $lhs)
+ (vselect (setole $lhs, $rhs), $rhs, $lhs),
+ (vselect (setgt $lhs, $rhs), $lhs, $rhs),
+ (vselect (setge $lhs, $rhs), $lhs, $rhs),
+ (vselect (setlt $lhs, $rhs), $rhs, $lhs),
+ (vselect (setle $lhs, $rhs), $rhs, $lhs)
]>;
defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index 67388b688e3bb..c47762e31ec23 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -12499,6 +12499,210 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %a
}
+define <4 x float> @pmin_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_olt:
+; SIMD128: .functype pmin_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmin $push0=, $1, $0
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_olt:
+; SIMD128-FAST: .functype pmin_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmin $push0=, $1, $0
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_olt:
+; NO-SIMD128: .functype pmin_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.lt $push0=, $8, $4
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.lt $push2=, $7, $3
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push4=, $6, $2
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.lt $push6=, $5, $1
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_olt:
+; NO-SIMD128-FAST: .functype pmin_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $5, $1
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $6, $2
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $7, $3
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $8, $4
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast olt <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmin_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_ogt:
+; SIMD128: .functype pmin_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_ogt:
+; SIMD128-FAST: .functype pmin_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_ogt:
+; NO-SIMD128: .functype pmin_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_ogt:
+; NO-SIMD128-FAST: .functype pmin_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast olt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmin_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_ole:
+; SIMD128: .functype pmin_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmin $push0=, $1, $0
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_ole:
+; SIMD128-FAST: .functype pmin_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmin $push0=, $1, $0
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_ole:
+; NO-SIMD128: .functype pmin_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.le $push0=, $8, $4
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.le $push2=, $7, $3
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.le $push4=, $6, $2
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.le $push6=, $5, $1
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_ole:
+; NO-SIMD128-FAST: .functype pmin_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.le $push0=, $5, $1
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.le $push2=, $6, $2
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.le $push4=, $7, $3
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.le $push6=, $8, $4
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast ole <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmin_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmin_v4f32_fast_oge:
+; SIMD128: .functype pmin_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmin_v4f32_fast_oge:
+; SIMD128-FAST: .functype pmin_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmin_v4f32_fast_oge:
+; NO-SIMD128: .functype pmin_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.le $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.le $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.le $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.le $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmin_v4f32_fast_oge:
+; NO-SIMD128-FAST: .functype pmin_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.le $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.le $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.le $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.le $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast ole <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
; SIMD128-LABEL: pmin_int_v4f32:
; SIMD128: .functype pmin_int_v4f32 (v128, v128) -> (v128)
@@ -12619,6 +12823,210 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) {
ret <4 x float> %a
}
+define <4 x float> @pmax_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_ogt:
+; SIMD128: .functype pmax_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_ogt:
+; SIMD128-FAST: .functype pmax_v4f32_fast_ogt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_ogt:
+; NO-SIMD128: .functype pmax_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.gt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.gt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.gt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.gt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_ogt:
+; NO-SIMD128-FAST: .functype pmax_v4f32_fast_ogt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.gt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.gt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.gt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.gt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast ogt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmax_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_olt:
+; SIMD128: .functype pmax_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_olt:
+; SIMD128-FAST: .functype pmax_v4f32_fast_olt (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_olt:
+; NO-SIMD128: .functype pmax_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_olt:
+; NO-SIMD128-FAST: .functype pmax_v4f32_fast_olt (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.lt $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $5, $1, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.lt $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $6, $2, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast olt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmax_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_oge:
+; SIMD128: .functype pmax_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_oge:
+; SIMD128-FAST: .functype pmax_v4f32_fast_oge (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_oge:
+; NO-SIMD128: .functype pmax_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.ge $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $4, $8, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.ge $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $3, $7, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.ge $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $2, $6, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.ge $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $1, $5, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_oge:
+; NO-SIMD128-FAST: .functype pmax_v4f32_fast_oge (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.ge $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.select $push1=, $1, $5, $pop0
+; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1
+; NO-SIMD128-FAST-NEXT: f32.ge $push2=, $2, $6
+; NO-SIMD128-FAST-NEXT: f32.select $push3=, $2, $6, $pop2
+; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3
+; NO-SIMD128-FAST-NEXT: f32.ge $push4=, $3, $7
+; NO-SIMD128-FAST-NEXT: f32.select $push5=, $3, $7, $pop4
+; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5
+; NO-SIMD128-FAST-NEXT: f32.ge $push6=, $4, $8
+; NO-SIMD128-FAST-NEXT: f32.select $push7=, $4, $8, $pop6
+; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7
+; NO-SIMD128-FAST-NEXT: return
+ %c = fcmp fast oge <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ ret <4 x float> %a
+}
+
+define <4 x float> @pmax_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) {
+; SIMD128-LABEL: pmax_v4f32_fast_ole:
+; SIMD128: .functype pmax_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-NEXT: # %bb.0:
+; SIMD128-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-NEXT: return $pop0
+;
+; SIMD128-FAST-LABEL: pmax_v4f32_fast_ole:
+; SIMD128-FAST: .functype pmax_v4f32_fast_ole (v128, v128) -> (v128)
+; SIMD128-FAST-NEXT: # %bb.0:
+; SIMD128-FAST-NEXT: f32x4.pmax $push0=, $0, $1
+; SIMD128-FAST-NEXT: return $pop0
+;
+; NO-SIMD128-LABEL: pmax_v4f32_fast_ole:
+; NO-SIMD128: .functype pmax_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-NEXT: # %bb.0:
+; NO-SIMD128-NEXT: f32.le $push0=, $4, $8
+; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0
+; NO-SIMD128-NEXT: f32.store 12($0), $pop1
+; NO-SIMD128-NEXT: f32.le $push2=, $3, $7
+; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2
+; NO-SIMD128-NEXT: f32.store 8($0), $pop3
+; NO-SIMD128-NEXT: f32.le $push4=, $2, $6
+; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4
+; NO-SIMD128-NEXT: f32.store 4($0), $pop5
+; NO-SIMD128-NEXT: f32.le $push6=, $1, $5
+; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6
+; NO-SIMD128-NEXT: f32.store 0($0), $pop7
+; NO-SIMD128-NEXT: return
+;
+; NO-SIMD128-FAST-LABEL: pmax_v4f32_fast_ole:
+; NO-SIMD128-FAST: .functype pmax_v4f32_fast_ole (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NO-SIMD128-FAST-NEXT: # %bb.0:
+; NO-SIMD128-FAST-NEXT: f32.le $push0=, $1, $5
+; NO-SIMD128-FAST-NEXT: f32.sele...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice, thanks!
With fast-math, the ordered setcc nodes are converted to setcc nodes which do not care about NaNs, so add patterns that use setlt, setle, setgt and setge.
6a7b05c
to
21d2980
Compare
Cheers, I just needed to fix a couple of typos before merging. |
Summary: With fast-math, the ordered setcc nodes are converted to setcc nodes which do not care about NaNs, so add patterns that use setlt, setle, setgt and setge. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250651
With fast-math, the ordered setcc nodes are converted to setcc nodes which do not care about NaNs, so add patterns that use setlt, setle, setgt and setge.