Skip to content

Commit

Permalink
[WebAssembly] Implement pseudo-min/max SIMD instructions
Browse files Browse the repository at this point in the history
Summary:
As proposed in WebAssembly/simd#122. Since
these instructions are not yet merged to the SIMD spec proposal, this
patch makes them entirely opt-in by surfacing them only through LLVM
intrinsics and clang builtins. If these instructions are made
official, these intrinsics and builtins should be replaced with simple
instruction patterns.

Reviewers: aheejin

Subscribers: dschuff, sbc100, jgravelle-google, hiraditya, sunfish, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D79742
  • Loading branch information
tlively committed May 12, 2020
1 parent 25a95f4 commit 3d49d1c
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 0 deletions.
4 changes: 4 additions & 0 deletions clang/include/clang/Basic/BuiltinsWebAssembly.def
Expand Up @@ -139,8 +139,12 @@ TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_min_f32x4, "V4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_max_f32x4, "V4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_pmin_f32x4, "V4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_pmax_f32x4, "V4fV4fV4f", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_min_f64x2, "V2dV2dV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_max_f64x2, "V2dV2dV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_pmin_f64x2, "V2dV2dV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_pmax_f64x2, "V2dV2dV2d", "nc", "simd128")

TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")

Expand Down
16 changes: 16 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Expand Up @@ -15758,6 +15758,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
case WebAssembly::BI__builtin_wasm_pmin_f32x4:
case WebAssembly::BI__builtin_wasm_pmin_f64x2: {
Value *LHS = EmitScalarExpr(E->getArg(0));
Value *RHS = EmitScalarExpr(E->getArg(1));
Function *Callee =
CGM.getIntrinsic(Intrinsic::wasm_pmin, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
case WebAssembly::BI__builtin_wasm_pmax_f32x4:
case WebAssembly::BI__builtin_wasm_pmax_f64x2: {
Value *LHS = EmitScalarExpr(E->getArg(0));
Value *RHS = EmitScalarExpr(E->getArg(1));
Function *Callee =
CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
return Builder.CreateCall(Callee, {LHS, RHS});
}
case WebAssembly::BI__builtin_wasm_swizzle_v8x16: {
Value *Src = EmitScalarExpr(E->getArg(0));
Value *Indices = EmitScalarExpr(E->getArg(1));
Expand Down
20 changes: 20 additions & 0 deletions clang/lib/Headers/wasm_simd128.h
Expand Up @@ -937,6 +937,16 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t __a,
return (v128_t)__builtin_wasm_max_f32x4((__f32x4)__a, (__f32x4)__b);
}

static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmin(v128_t __a,
v128_t __b) {
return (v128_t)__builtin_wasm_pmin_f32x4((__f32x4)__a, (__f32x4)__b);
}

static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_pmax(v128_t __a,
v128_t __b) {
return (v128_t)__builtin_wasm_pmax_f32x4((__f32x4)__a, (__f32x4)__b);
}

static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t __a) {
return (v128_t)__builtin_wasm_abs_f64x2((__f64x2)__a);
}
Expand Down Expand Up @@ -997,6 +1007,16 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t __a,
return (v128_t)__builtin_wasm_max_f64x2((__f64x2)__a, (__f64x2)__b);
}

static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmin(v128_t __a,
v128_t __b) {
return (v128_t)__builtin_wasm_pmin_f64x2((__f64x2)__a, (__f64x2)__b);
}

static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_pmax(v128_t __a,
v128_t __b) {
return (v128_t)__builtin_wasm_pmax_f64x2((__f64x2)__a, (__f64x2)__b);
}

static __inline__ v128_t __DEFAULT_FN_ATTRS
wasm_i32x4_trunc_saturate_f32x4(v128_t __a) {
return (v128_t)__builtin_wasm_trunc_saturate_s_i32x4_f32x4((__f32x4)__a);
Expand Down
28 changes: 28 additions & 0 deletions clang/test/CodeGen/builtins-wasm.c
Expand Up @@ -579,6 +579,20 @@ f32x4 max_f32x4(f32x4 x, f32x4 y) {
// WEBASSEMBLY-NEXT: ret
}

f32x4 pmin_f32x4(f32x4 x, f32x4 y) {
return __builtin_wasm_pmin_f32x4(x, y);
// WEBASSEMBLY: call <4 x float> @llvm.wasm.pmin.v4f32(
// WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
// WEBASSEMBLY-NEXT: ret
}

f32x4 pmax_f32x4(f32x4 x, f32x4 y) {
return __builtin_wasm_pmax_f32x4(x, y);
// WEBASSEMBLY: call <4 x float> @llvm.wasm.pmax.v4f32(
// WEBASSEMBLY-SAME: <4 x float> %x, <4 x float> %y)
// WEBASSEMBLY-NEXT: ret
}

f64x2 min_f64x2(f64x2 x, f64x2 y) {
return __builtin_wasm_min_f64x2(x, y);
// WEBASSEMBLY: call <2 x double> @llvm.minimum.v2f64(
Expand All @@ -593,6 +607,20 @@ f64x2 max_f64x2(f64x2 x, f64x2 y) {
// WEBASSEMBLY-NEXT: ret
}

f64x2 pmin_f64x2(f64x2 x, f64x2 y) {
return __builtin_wasm_pmin_f64x2(x, y);
// WEBASSEMBLY: call <2 x double> @llvm.wasm.pmin.v2f64(
// WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
// WEBASSEMBLY-NEXT: ret
}

f64x2 pmax_f64x2(f64x2 x, f64x2 y) {
return __builtin_wasm_pmax_f64x2(x, y);
// WEBASSEMBLY: call <2 x double> @llvm.wasm.pmax.v2f64(
// WEBASSEMBLY-SAME: <2 x double> %x, <2 x double> %y)
// WEBASSEMBLY-NEXT: ret
}

f32x4 sqrt_f32x4(f32x4 x) {
return __builtin_wasm_sqrt_f32x4(x);
// WEBASSEMBLY: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
Expand Down
11 changes: 11 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsWebAssembly.td
Expand Up @@ -176,6 +176,17 @@ def int_wasm_widen_high_unsigned :
[llvm_anyvector_ty],
[IntrNoMem, IntrSpeculatable]>;

// TODO: Replace these intrinsics with normal ISel patterns once the
// pmin/pmax instructions are merged to the spec proposal.
def int_wasm_pmin :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
def int_wasm_pmax :
Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;

//===----------------------------------------------------------------------===//
// Bulk memory intrinsics
//===----------------------------------------------------------------------===//
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
Expand Up @@ -794,6 +794,12 @@ defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
// NaN-propagating maximum: max
defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;

// Pseudo-minimum: pmin
defm PMIN : SIMDBinaryFP<int_wasm_pmin, "pmin", 234>;

// Pseudo-maximum: pmax
defm PMAX : SIMDBinaryFP<int_wasm_pmax, "pmax", 235>;

//===----------------------------------------------------------------------===//
// Conversions
//===----------------------------------------------------------------------===//
Expand Down
40 changes: 40 additions & 0 deletions llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
Expand Up @@ -501,6 +501,26 @@ define <4 x float> @bitselect_v4f32(<4 x float> %v1, <4 x float> %v2, <4 x float
ret <4 x float> %a
}

; CHECK-LABEL: pmin_v4f32:
; SIMD128-NEXT: .functype pmin_v4f32 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: f32x4.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
declare <4 x float> @llvm.wasm.pmin.v4f32(<4 x float>, <4 x float>)
define <4 x float> @pmin_v4f32(<4 x float> %a, <4 x float> %b) {
%v = call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> %a, <4 x float> %b)
ret <4 x float> %v
}

; CHECK-LABEL: pmax_v4f32:
; SIMD128-NEXT: .functype pmax_v4f32 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: f32x4.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
declare <4 x float> @llvm.wasm.pmax.v4f32(<4 x float>, <4 x float>)
define <4 x float> @pmax_v4f32(<4 x float> %a, <4 x float> %b) {
%v = call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> %a, <4 x float> %b)
ret <4 x float> %v
}

; CHECK-LABEL: qfma_v4f32:
; SIMD128-NEXT: .functype qfma_v4f32 (v128, v128, v128) -> (v128){{$}}
; SIMD128-NEXT: f32x4.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
Expand Down Expand Up @@ -540,6 +560,26 @@ define <2 x double> @bitselect_v2f64(<2 x double> %v1, <2 x double> %v2, <2 x do
ret <2 x double> %a
}

; CHECK-LABEL: pmin_v2f64:
; SIMD128-NEXT: .functype pmin_v2f64 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: f64x2.pmin $push[[R:[0-9]+]]=, $0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
declare <2 x double> @llvm.wasm.pmin.v2f64(<2 x double>, <2 x double>)
define <2 x double> @pmin_v2f64(<2 x double> %a, <2 x double> %b) {
%v = call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> %a, <2 x double> %b)
ret <2 x double> %v
}

; CHECK-LABEL: pmax_v2f64:
; SIMD128-NEXT: .functype pmax_v2f64 (v128, v128) -> (v128){{$}}
; SIMD128-NEXT: f64x2.pmax $push[[R:[0-9]+]]=, $0, $1{{$}}
; SIMD128-NEXT: return $pop[[R]]{{$}}
declare <2 x double> @llvm.wasm.pmax.v2f64(<2 x double>, <2 x double>)
define <2 x double> @pmax_v2f64(<2 x double> %a, <2 x double> %b) {
%v = call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> %a, <2 x double> %b)
ret <2 x double> %v
}

; CHECK-LABEL: qfma_v2f64:
; SIMD128-NEXT: .functype qfma_v2f64 (v128, v128, v128) -> (v128){{$}}
; SIMD128-NEXT: f64x2.qfma $push[[R:[0-9]+]]=, $0, $1, $2{{$}}
Expand Down
12 changes: 12 additions & 0 deletions llvm/test/MC/WebAssembly/simd-encodings.s
Expand Up @@ -535,6 +535,12 @@ main:
# CHECK: f32x4.max # encoding: [0xfd,0xe9,0x01]
f32x4.max

# CHECK: f32x4.pmin # encoding: [0xfd,0xea,0x01]
f32x4.pmin

# CHECK: f32x4.pmax # encoding: [0xfd,0xeb,0x01]
f32x4.pmax

# CHECK: f64x2.abs # encoding: [0xfd,0xec,0x01]
f64x2.abs

Expand Down Expand Up @@ -562,6 +568,12 @@ main:
# CHECK: f64x2.max # encoding: [0xfd,0xf5,0x01]
f64x2.max

# CHECK: f64x2.pmin # encoding: [0xfd,0xf6,0x01]
f64x2.pmin

# CHECK: f64x2.pmax # encoding: [0xfd,0xf7,0x01]
f64x2.pmax

# CHECK: i32x4.trunc_sat_f32x4_s # encoding: [0xfd,0xf8,0x01]
i32x4.trunc_sat_f32x4_s

Expand Down

0 comments on commit 3d49d1c

Please sign in to comment.