[WebAssembly] SIMD min and max

Summary: Depends on D52324 and D52764. Reviewers: aheejin, dschuff Subscribers: sbc100, jgravelle-google, sunfish, llvm-commits Differential Revision: https://reviews.llvm.org/D52325 llvm-svn: 344438
llvm · Oct 13, 2018 · 3afc346 · 3afc346
1 parent 16c349d
commit 3afc346
Show file tree

Hide file tree

Showing 5 changed files with 223 additions and 19 deletions.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -699,21 +699,21 @@ defm "" : SIMDAbs<v2f64, "f64x2", 128>;
 // Floating-point min and max
 //===----------------------------------------------------------------------===//
 
+multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
+  defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
+  defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 1)>;
+}
+
 // NaN-propagating minimum: min
-// TODO
+defm MIN : SIMDBinaryFP<fminnan, "min", 129>;
 
 // NaN-propagating maximum: max
-// TODO
+defm MAX : SIMDBinaryFP<fmaxnan, "max", 131>;
 
 //===----------------------------------------------------------------------===//
 // Floating-point arithmetic
 //===----------------------------------------------------------------------===//
 
-multiclass SIMDBinaryFP<SDNode node, string name, bits<32> baseInst> {
-  defm "" : SIMDBinary<v4f32, "f32x4", node, name, baseInst>;
-  defm "" : SIMDBinary<v2f64, "f64x2", node, name, !add(baseInst, 1)>;
-}
-
 // Addition: add
 let isCommutable = 1 in
 defm ADD : SIMDBinaryFP<fadd, "add", 133>;

diff --git a/llvm/test/CodeGen/WebAssembly/f32.ll b/llvm/test/CodeGen/WebAssembly/f32.ll
@@ -123,12 +123,6 @@ define float @nearest32_via_rint(float %x) {
   ret float %a
 }
 
-; Min and max tests. LLVM currently only forms fminnan and fmaxnan nodes in
-; cases where there's a single fcmp with a select and it can prove that one
-; of the arms is never NaN, so we only test that case. In the future if LLVM
-; learns to form fminnan/fmaxnan in more cases, we can write more general
-; tests.
-
 ; CHECK-LABEL: fmin32:
 ; CHECK: f32.min $push1=, $pop{{[0-9]+}}, $pop[[LR]]{{$}}
 ; CHECK-NEXT: return $pop1{{$}}
@@ -147,6 +141,24 @@ define float @fmax32(float %x) {
   ret float %b
 }
 
+; CHECK-LABEL: fmin32_intrinsic:
+; CHECK: f32.min $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+declare float @llvm.minimum.f32(float, float)
+define float @fmin32_intrinsic(float %x, float %y) {
+  %a = call float @llvm.minimum.f32(float %x, float %y)
+  ret float %a
+}
+
+; CHECK-LABEL: fmax32_intrinsic:
+; CHECK: f32.max $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+declare float @llvm.maximum.f32(float, float)
+define float @fmax32_intrinsic(float %x, float %y) {
+  %a = call float @llvm.maximum.f32(float %x, float %y)
+  ret float %a
+}
+
 ; CHECK-LABEL: fma32:
 ; CHECK: {{^}} f32.call $push[[LR:[0-9]+]]=, fmaf@FUNCTION, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
 ; CHECK-NEXT: return $pop[[LR]]{{$}}

diff --git a/llvm/test/CodeGen/WebAssembly/f64.ll b/llvm/test/CodeGen/WebAssembly/f64.ll
@@ -123,12 +123,6 @@ define double @nearest64_via_rint(double %x) {
   ret double %a
 }
 
-; Min and max tests. LLVM currently only forms fminnan and fmaxnan nodes in
-; cases where there's a single fcmp with a select and it can prove that one
-; of the arms is never NaN, so we only test that case. In the future if LLVM
-; learns to form fminnan/fmaxnan in more cases, we can write more general
-; tests.
-
 ; CHECK-LABEL: fmin64:
 ; CHECK: f64.min $push1=, $pop{{[0-9]+}}, $pop[[LR]]{{$}}
 ; CHECK-NEXT: return $pop1{{$}}
@@ -147,6 +141,24 @@ define double @fmax64(double %x) {
   ret double %b
 }
 
+; CHECK-LABEL: fmin64_intrinsic:
+; CHECK: f64.min $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+declare double @llvm.minimum.f64(double, double)
+define double @fmin64_intrinsic(double %x, double %y) {
+  %a = call double @llvm.minimum.f64(double %x, double %y)
+  ret double %a
+}
+
+; CHECK-LABEL: fmax64_intrinsic:
+; CHECK: f64.max $push0=, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+declare double @llvm.maximum.f64(double, double)
+define double @fmax64_intrinsic(double %x, double %y) {
+  %a = call double @llvm.maximum.f64(double %x, double %y)
+  ret double %a
+}
+
 ; CHECK-LABEL: fma64:
 ; CHECK: {{^}} f64.call $push[[LR:[0-9]+]]=, fma@FUNCTION, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}}
 ; CHECK-NEXT: return $pop[[LR]]{{$}}

diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -765,6 +765,90 @@ define <4 x float> @abs_v4f32(<4 x float> %x) {
   ret <4 x float> %a
 }
 
+; CHECK-LABEL: min_unordered_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
+; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
+; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x float> @min_unordered_v4f32(<4 x float> %x) {
+  %cmps = fcmp ule <4 x float> %x, <float 5., float 5., float 5., float 5.>
+  %a = select <4 x i1> %cmps, <4 x float> %x,
+    <4 x float> <float 5., float 5., float 5., float 5.>
+  ret <4 x float> %a
+}
+
+; CHECK-LABEL: max_unordered_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
+; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
+; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x float> @max_unordered_v4f32(<4 x float> %x) {
+  %cmps = fcmp uge <4 x float> %x, <float 5., float 5., float 5., float 5.>
+  %a = select <4 x i1> %cmps, <4 x float> %x,
+    <4 x float> <float 5., float 5., float 5., float 5.>
+  ret <4 x float> %a
+}
+
+; CHECK-LABEL: min_ordered_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
+; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
+; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $pop[[L1]], $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x float> @min_ordered_v4f32(<4 x float> %x) {
+  %cmps = fcmp ole <4 x float> <float 5., float 5., float 5., float 5.>, %x
+  %a = select <4 x i1> %cmps,
+    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
+  ret <4 x float> %a
+}
+
+; CHECK-LABEL: max_ordered_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.4p2
+; SIMD128-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
+; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $pop[[L1]], $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <4 x float> @max_ordered_v4f32(<4 x float> %x) {
+  %cmps = fcmp oge <4 x float> <float 5., float 5., float 5., float 5.>, %x
+  %a = select <4 x i1> %cmps,
+    <4 x float> <float 5., float 5., float 5., float 5.>, <4 x float> %x
+  ret <4 x float> %a
+}
+
+; CHECK-LABEL: min_intrinsic_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
+define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
+  %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %a
+}
+
+; CHECK-LABEL: max_intrinsic_v4f32:
+; NO-SIMD128-NOT: f32x4
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
+define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) {
+  %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
+  ret <4 x float> %a
+}
+
 ; CHECK-LABEL: add_v4f32:
 ; NO-SIMD128-NOT: f32x4
 ; SIMD128-NEXT: .param v128, v128{{$}}
@@ -848,6 +932,90 @@ define <2 x double> @abs_v2f64(<2 x double> %x) {
   ret <2 x double> %a
 }
 
+; CHECK-LABEL: min_unordered_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
+; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
+; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x double> @min_unordered_v2f64(<2 x double> %x) {
+  %cmps = fcmp ule <2 x double> %x, <double 5., double 5.>
+  %a = select <2 x i1> %cmps, <2 x double> %x,
+    <2 x double> <double 5., double 5.>
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: max_unordered_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
+; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
+; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L1]]{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x double> @max_unordered_v2f64(<2 x double> %x) {
+  %cmps = fcmp uge <2 x double> %x, <double 5., double 5.>
+  %a = select <2 x i1> %cmps, <2 x double> %x,
+    <2 x double> <double 5., double 5.>
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: min_ordered_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
+; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
+; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $pop[[L1]], $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x double> @min_ordered_v2f64(<2 x double> %x) {
+  %cmps = fcmp ole <2 x double> <double 5., double 5.>, %x
+  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
+    <2 x double> %x
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: max_ordered_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .param v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f64.const $push[[L0:[0-9]+]]=, 0x1.4p2
+; SIMD128-NEXT: f64x2.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
+; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $pop[[L1]], $0{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+define <2 x double> @max_ordered_v2f64(<2 x double> %x) {
+  %cmps = fcmp oge <2 x double> <double 5., double 5.>, %x
+  %a = select <2 x i1> %cmps, <2 x double> <double 5., double 5.>,
+    <2 x double> %x
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: min_intrinsic_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
+define <2 x double> @min_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
+  %a = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %a
+}
+
+; CHECK-LABEL: max_intrinsic_v2f64:
+; NO-SIMD128-NOT: f64x2
+; SIMD128-NEXT: .param v128, v128{{$}}
+; SIMD128-NEXT: .result v128{{$}}
+; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $1{{$}}
+; SIMD128-NEXT: return $pop[[R]]{{$}}
+declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
+define <2 x double> @max_intrinsic_v2f64(<2 x double> %x, <2 x double> %y) {
+  %a = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> %y)
+  ret <2 x double> %a
+}
+
 ; CHECK-LABEL: add_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-VM-NOT: f62x2

diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -382,6 +382,18 @@
     # CHECK: f64x2.abs # encoding: [0xfd,0x80]
     f64x2.abs
 
+    # CHECK: f32x4.min # encoding: [0xfd,0x81]
+    f32x4.min
+
+    # CHECK: f64x2.min # encoding: [0xfd,0x82]
+    f64x2.min
+
+    # CHECK: f32x4.max # encoding: [0xfd,0x83]
+    f32x4.max
+
+    # CHECK: f64x2.max # encoding: [0xfd,0x84]
+    f64x2.max
+
     # CHECK: f32x4.add # encoding: [0xfd,0x85]
     f32x4.add