-
Notifications
You must be signed in to change notification settings - Fork 15k
[WebAssembly] [Codegen] Add pattern for relaxed min max from pmin/pmax-based patterns over v4f32 and v2f64 #164486
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-webassembly Author: Jasmine Tang (badumbatish) ChangesRelated to #55932 Patch is 25.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164486.diff 4 Files Affected:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 64723340051b8..70c9e499945fe 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -183,6 +183,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
for (auto T : {MVT::i32, MVT::i64})
setOperationAction(Op, T, Custom);
+ if (Subtarget->hasRelaxedSIMD()) {
+ setOperationAction(
+ {ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM},
+ {MVT::v4f32, MVT::v2f64}, Legal);
+ }
// SIMD-specific configuration
if (Subtarget->hasSIMD128()) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 49af78bce68c3..156ac44f119a6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1692,6 +1692,42 @@ defm SIMD_RELAXED_FMIN :
defm SIMD_RELAXED_FMAX :
RelaxedBinary<F64x2, int_wasm_relaxed_max, "relaxed_max", 0x110>;
+let Predicates = [HasRelaxedSIMD] in {
+ foreach vec = [F32x4, F64x2] in {
+ defvar relaxed_min = !cast<NI>("SIMD_RELAXED_FMIN_"#vec);
+ defvar relaxed_max = !cast<NI>("SIMD_RELAXED_FMAX_"#vec);
+
+ // Transform standard fminimum/fmaximum to relaxed versions
+ def : Pat<(vec.vt (fminnum (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
+ (relaxed_min V128:$lhs, V128:$rhs)>;
+ def : Pat<(vec.vt (fminimumnum (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
+ (relaxed_min V128:$lhs, V128:$rhs)>;
+ def : Pat<(vec.vt (fmaxnum (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
+ (relaxed_max V128:$lhs, V128:$rhs)>;
+ def : Pat<(vec.vt (fmaximumnum (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
+ (relaxed_max V128:$lhs, V128:$rhs)>;
+
+ // Transform pmin/max-supposed patterns to relaxed min max
+ let AddedComplexity = 1 in {
+ def : Pat<(vec.vt (pmin (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
+ (relaxed_min $lhs, $rhs)>;
+ def : Pat<(vec.vt (pmax (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
+ (relaxed_max $lhs, $rhs)>;
+
+ def : Pat<(vec.int_vt (vselect
+ (setolt (vec.vt (bitconvert V128:$rhs)),
+ (vec.vt (bitconvert V128:$lhs))),
+ V128:$rhs, V128:$lhs)),
+ (relaxed_min $lhs, $rhs)>;
+ def : Pat<(vec.int_vt (vselect
+ (setolt (vec.vt (bitconvert V128:$lhs)),
+ (vec.vt (bitconvert V128:$rhs))),
+ V128:$rhs, V128:$lhs)),
+ (relaxed_max $lhs, $rhs)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Relaxed rounding q15 multiplication
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll
new file mode 100644
index 0000000000000..f224a0dc21136
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll
@@ -0,0 +1,304 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd | FileCheck %s
+
+; Test that fmaxnum and fmaximumnum get transformed to relaxed_max
+
+target triple = "wasm32"
+
+define <4 x float> @test_maxnum_f32x4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maxnum_f32x4:
+; CHECK: .functype test_maxnum_f32x4 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %result = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %result
+}
+
+define <4 x float> @test_maximumnum_f32x4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_maximumnum_f32x4:
+; CHECK: .functype test_maximumnum_f32x4 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %result = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %result
+}
+
+define <2 x double> @test_maxnum_f64x2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_maxnum_f64x2:
+; CHECK: .functype test_maxnum_f64x2 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %result = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %result
+}
+
+define <2 x double> @test_minimumnum_f64x2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_minimumnum_f64x2:
+; CHECK: .functype test_minimumnum_f64x2 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %result = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %result
+}
+
+define <4 x float> @test_pmax_v4f32_olt(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmax_v4f32_olt:
+; CHECK: .functype test_pmax_v4f32_olt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp olt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @test_pmax_v4f32_ole(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmax_v4f32_ole:
+; CHECK: .functype test_pmax_v4f32_ole (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp ole <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @test_pmax_v4f32_ogt(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmax_v4f32_ogt:
+; CHECK: .functype test_pmax_v4f32_ogt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp ogt <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @test_pmax_v4f32_oge(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmax_v4f32_oge:
+; CHECK: .functype test_pmax_v4f32_oge (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp oge <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+; For setlt
+define <4 x float> @pmax_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: pmax_v4f32_fast_olt:
+; CHECK: .functype pmax_v4f32_fast_olt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast olt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+; For setle
+define <4 x float> @test_pmax_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmax_v4f32_fast_ole:
+; CHECK: .functype test_pmax_v4f32_fast_ole (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast ole <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+; For setgt
+define <4 x float> @test_pmax_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmax_v4f32_fast_ogt:
+; CHECK: .functype test_pmax_v4f32_fast_ogt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast ogt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ ret <4 x float> %a
+}
+
+; For setge
+define <4 x float> @test_pmax_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmax_v4f32_fast_oge:
+; CHECK: .functype test_pmax_v4f32_fast_oge (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast oge <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+ ret <4 x float> %a
+}
+
+define <4 x i32> @test_pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: test_pmax_int_v4f32:
+; CHECK: .functype test_pmax_int_v4f32 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %fx = bitcast <4 x i32> %x to <4 x float>
+ %fy = bitcast <4 x i32> %y to <4 x float>
+ %c = fcmp olt <4 x float> %fy, %fx
+ %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
+ ret <4 x i32> %a
+}
+
+define <2 x double> @test_pmax_v2f64_olt(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: test_pmax_v2f64_olt:
+; CHECK: .functype test_pmax_v2f64_olt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp olt <2 x double> %x, %y
+ %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
+ ret <2 x double> %a
+}
+
+define <2 x double> @test_pmax_v2f64_ole(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: test_pmax_v2f64_ole:
+; CHECK: .functype test_pmax_v2f64_ole (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp ole <2 x double> %x, %y
+ %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
+ ret <2 x double> %a
+}
+
+define <2 x double> @test_pmax_v2f64_ogt(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: test_pmax_v2f64_ogt:
+; CHECK: .functype test_pmax_v2f64_ogt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp ogt <2 x double> %x, %y
+ %a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
+ ret <2 x double> %a
+}
+define <2 x double> @test_pmax_v2f64_oge(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: test_pmax_v2f64_oge:
+; CHECK: .functype test_pmax_v2f64_oge (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp oge <2 x double> %x, %y
+ %a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
+ ret <2 x double> %a
+}
+
+; For setlt
+define <2 x double> @pmax_v2f64_fast_olt(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: pmax_v2f64_fast_olt:
+; CHECK: .functype pmax_v2f64_fast_olt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast olt <2 x double> %x, %y
+ %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
+ ret <2 x double> %a
+}
+
+; For setle
+define <2 x double> @test_pmax_v2f64_fast_ole(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: test_pmax_v2f64_fast_ole:
+; CHECK: .functype test_pmax_v2f64_fast_ole (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast ole <2 x double> %x, %y
+ %a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
+ ret <2 x double> %a
+}
+; For setgt
+define <2 x double> @test_pmax_v2f64_fast_ogt(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: test_pmax_v2f64_fast_ogt:
+; CHECK: .functype test_pmax_v2f64_fast_ogt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast ogt <2 x double> %x, %y
+ %a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
+ ret <2 x double> %a
+}
+
+; For setge
+define <2 x double> @test_pmax_v2f64_fast_oge(<2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: test_pmax_v2f64_fast_oge:
+; CHECK: .functype test_pmax_v2f64_fast_oge (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast oge <2 x double> %x, %y
+ %a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
+ ret <2 x double> %a
+}
+
+define <2 x i64> @test_pmax_int_v2f64(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: test_pmax_int_v2f64:
+; CHECK: .functype test_pmax_int_v2f64 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f64x2.relaxed_max
+; CHECK-NEXT: # fallthrough-return
+ %fx = bitcast <2 x i64> %x to <2 x double>
+ %fy = bitcast <2 x i64> %y to <2 x double>
+ %c = fcmp olt <2 x double> %fy, %fx
+ %a = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
+ ret <2 x i64> %a
+}
+
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.maximumnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.maximumnum.v2f64(<2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll
new file mode 100644
index 0000000000000..460446574ecfb
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fmin.ll
@@ -0,0 +1,305 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mattr=+simd128,+relaxed-simd | FileCheck %s
+
+; Test that fminnum and fminimumnum get transformed to relaxed_min
+
+target triple = "wasm32"
+
+define <4 x float> @test_minnum_f32x4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minnum_f32x4:
+; CHECK: .functype test_minnum_f32x4 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %result = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %result
+}
+
+define <4 x float> @test_minimumnum_f32x4(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: test_minimumnum_f32x4:
+; CHECK: .functype test_minimumnum_f32x4 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %result = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %result
+}
+
+define <2 x double> @test_minnum_f64x2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_minnum_f64x2:
+; CHECK: .functype test_minnum_f64x2 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %result = call <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %result
+}
+
+define <2 x double> @test_minimumnum_f64x2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_minimumnum_f64x2:
+; CHECK: .functype test_minimumnum_f64x2 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f64x2.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %result = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %a, <2 x double> %b)
+ ret <2 x double> %result
+}
+
+define <4 x float> @test_pmin_v4f32_olt(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmin_v4f32_olt:
+; CHECK: .functype test_pmin_v4f32_olt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp olt <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @test_pmin_v4f32_ole(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmin_v4f32_ole:
+; CHECK: .functype test_pmin_v4f32_ole (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp ole <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @test_pmin_v4f32_ogt(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmin_v4f32_ogt:
+; CHECK: .functype test_pmin_v4f32_ogt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp ogt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+define <4 x float> @test_pmin_v4f32_oge(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmin_v4f32_oge:
+; CHECK: .functype test_pmin_v4f32_oge (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp oge <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+; For setlt
+define <4 x float> @pmin_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: pmin_v4f32_fast_olt:
+; CHECK: .functype pmin_v4f32_fast_olt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast olt <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+; For setle
+define <4 x float> @test_pmin_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmin_v4f32_fast_ole:
+; CHECK: .functype test_pmin_v4f32_fast_ole (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: f32x4.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast ole <4 x float> %y, %x
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+; For setgt
+define <4 x float> @test_pmin_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmin_v4f32_fast_ogt:
+; CHECK: .functype test_pmin_v4f32_fast_ogt (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: f32x4.relaxed_min
+; CHECK-NEXT: # fallthrough-return
+ %c = fcmp fast ogt <4 x float> %x, %y
+ %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
+ ret <4 x float> %a
+}
+
+; For setge
+define <4 x float> @test_pmin_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: test_pmin_v4f32_fast_oge:
+; CHECK: .functype test_pmin_v4f32_fast_oge (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for doing this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/162/builds/33818 Here is the relevant piece of the build log for the reference |
…x-based patterns over v4f32 and v2f64 (llvm#164486) Related to llvm#55932
…x-based patterns over v4f32 and v2f64 (llvm#164486) Related to llvm#55932
…x-based patterns over v4f32 and v2f64 (llvm#164486) Related to llvm#55932
…x-based patterns over v4f32 and v2f64 (llvm#164486) Related to llvm#55932
Related to #55932
Follow up to and depends on #162948. First 5 commit is from said base PR.