Skip to content

Commit

Permalink
[WebAssembly] Add wasm_simd128.h intrinsics for relaxed SIMD
Browse files Browse the repository at this point in the history
Add user-friendly intrinsic functions for all relaxed SIMD instructions
alongside the existing SIMD128 intrinsic functions in wasm_simd128.h. Test that
the new instrinsics lower to the expected instructions in the existing
cross-project-tests test file.

Reviewed By: aheejin, sbc100

Differential Revision: https://reviews.llvm.org/D150833
  • Loading branch information
tlively committed May 18, 2023
1 parent 35f9fd6 commit c672c3f
Show file tree
Hide file tree
Showing 3 changed files with 243 additions and 2 deletions.
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/BuiltinsWebAssembly.def
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4iV4i", "nc", "simd128
TARGET_BUILTIN(__builtin_wasm_trunc_sat_s_zero_f64x2_i32x4, "V4iV2d", "nc", "simd128")
TARGET_BUILTIN(__builtin_wasm_trunc_sat_u_zero_f64x2_i32x4, "V4UiV2d", "nc", "simd128")

// Relaxed SIMD builtins (experimental)
// Relaxed SIMD builtins
TARGET_BUILTIN(__builtin_wasm_relaxed_madd_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd")
TARGET_BUILTIN(__builtin_wasm_relaxed_nmadd_f32x4, "V4fV4fV4fV4f", "nc", "relaxed-simd")
TARGET_BUILTIN(__builtin_wasm_relaxed_madd_f64x2, "V2dV2dV2dV2d", "nc", "relaxed-simd")
Expand Down
120 changes: 120 additions & 0 deletions clang/lib/Headers/wasm_simd128.h
Original file line number Diff line number Diff line change
Expand Up @@ -1760,6 +1760,126 @@ wasm_u64x2_load_32x2(const void *__mem) {
__DEPRECATED_WASM_MACRO("wasm_v64x2_shuffle", "wasm_i64x2_shuffle") \
wasm_i64x2_shuffle(__a, __b, __c0, __c1)

// Relaxed SIMD intrinsics

#define __RELAXED_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("relaxed-simd"), \
__min_vector_width__(128)))

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_f32x4_relaxed_madd(v128_t __a, v128_t __b, v128_t __c) {
return (v128_t)__builtin_wasm_relaxed_madd_f32x4((__f32x4)__a, (__f32x4)__b,
(__f32x4)__c);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_f32x4_relaxed_nmadd(v128_t __a, v128_t __b, v128_t __c) {
return (v128_t)__builtin_wasm_relaxed_nmadd_f32x4((__f32x4)__a, (__f32x4)__b,
(__f32x4)__c);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_f64x2_relaxed_madd(v128_t __a, v128_t __b, v128_t __c) {
return (v128_t)__builtin_wasm_relaxed_madd_f64x2((__f64x2)__a, (__f64x2)__b,
(__f64x2)__c);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_f64x2_relaxed_nmadd(v128_t __a, v128_t __b, v128_t __c) {
return (v128_t)__builtin_wasm_relaxed_nmadd_f64x2((__f64x2)__a, (__f64x2)__b,
(__f64x2)__c);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i8x16_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) {
return (v128_t)__builtin_wasm_relaxed_laneselect_i8x16(
(__i8x16)__a, (__i8x16)__b, (__i8x16)__m);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i16x8_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) {
return (v128_t)__builtin_wasm_relaxed_laneselect_i16x8(
(__i16x8)__a, (__i16x8)__b, (__i16x8)__m);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i32x4_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) {
return (v128_t)__builtin_wasm_relaxed_laneselect_i32x4(
(__i32x4)__a, (__i32x4)__b, (__i32x4)__m);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i64x2_relaxed_laneselect(v128_t __a, v128_t __b, v128_t __m) {
return (v128_t)__builtin_wasm_relaxed_laneselect_i64x2(
(__i64x2)__a, (__i64x2)__b, (__i64x2)__m);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i8x16_relaxed_swizzle(v128_t __a, v128_t __s) {
return (v128_t)__builtin_wasm_relaxed_swizzle_i8x16((__i8x16)__a,
(__i8x16)__s);
}

static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_min(v128_t __a,
v128_t __b) {
return (v128_t)__builtin_wasm_relaxed_min_f32x4((__f32x4)__a, (__f32x4)__b);
}

static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f32x4_relaxed_max(v128_t __a,
v128_t __b) {
return (v128_t)__builtin_wasm_relaxed_max_f32x4((__f32x4)__a, (__f32x4)__b);
}

static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_min(v128_t __a,
v128_t __b) {
return (v128_t)__builtin_wasm_relaxed_min_f64x2((__f64x2)__a, (__f64x2)__b);
}

static __inline__ v128_t __RELAXED_FN_ATTRS wasm_f64x2_relaxed_max(v128_t __a,
v128_t __b) {
return (v128_t)__builtin_wasm_relaxed_max_f64x2((__f64x2)__a, (__f64x2)__b);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i32x4_relaxed_trunc_f32x4(v128_t __a) {
return (v128_t)__builtin_wasm_relaxed_trunc_s_i32x4_f32x4((__f32x4)__a);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_u32x4_relaxed_trunc_f32x4(v128_t __a) {
return (v128_t)__builtin_wasm_relaxed_trunc_u_i32x4_f32x4((__f32x4)__a);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i32x4_relaxed_trunc_f64x2_zero(v128_t __a) {
return (v128_t)__builtin_wasm_relaxed_trunc_s_zero_i32x4_f64x2((__f64x2)__a);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_u32x4_relaxed_trunc_f64x2_zero(v128_t __a) {
return (v128_t)__builtin_wasm_relaxed_trunc_u_zero_i32x4_f64x2((__f64x2)__a);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i16x8_relaxed_q15mulr(v128_t __a, v128_t __b) {
return (v128_t)__builtin_wasm_relaxed_q15mulr_s_i16x8((__i16x8)__a,
(__i16x8)__b);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i16x8_relaxed_dot_i8x16_i7x16(v128_t __a, v128_t __b) {
return (v128_t)__builtin_wasm_relaxed_dot_i8x16_i7x16_s_i16x8((__i8x16)__a,
(__i8x16)__b);
}

static __inline__ v128_t __RELAXED_FN_ATTRS
wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) {
return (v128_t)__builtin_wasm_relaxed_dot_i8x16_i7x16_add_s_i32x4(
(__i8x16)__a, (__i8x16)__b, (__i32x4)__c);
}

// Deprecated intrinsics

static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle")
wasm_v8x16_swizzle(v128_t __a, v128_t __b) {
return wasm_i8x16_swizzle(__a, __b);
Expand Down
123 changes: 122 additions & 1 deletion cross-project-tests/intrinsic-header-tests/wasm_simd128.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
// REQUIRES: webassembly-registered-target
// expected-no-diagnostics

// RUN: %clang %s -O2 -S -o - -target wasm32-unknown-unknown -msimd128 -Wcast-qual -Werror | FileCheck %s
// RUN: %clang %s -O2 -S -o - -target wasm32-unknown-unknown \
// RUN: -msimd128 -mrelaxed-simd -Wcast-qual -Werror | FileCheck %s

#include <wasm_simd128.h>

Expand Down Expand Up @@ -1264,3 +1265,123 @@ v128_t test_u64x2_extmul_high_u32x4(v128_t a, v128_t b) {
v128_t test_i16x8_q15mulr_sat(v128_t a, v128_t b) {
return wasm_i16x8_q15mulr_sat(a, b);
}

// CHECK-LABEL: test_f32x4_relaxed_madd:
// CHECK: f32x4.relaxed_madd{{$}}
v128_t test_f32x4_relaxed_madd(v128_t a, v128_t b, v128_t c) {
return wasm_f32x4_relaxed_madd(a, b, c);
}

// CHECK-LABEL: test_f32x4_relaxed_nmadd:
// CHECK: f32x4.relaxed_nmadd{{$}}
v128_t test_f32x4_relaxed_nmadd(v128_t a, v128_t b, v128_t c) {
return wasm_f32x4_relaxed_nmadd(a, b, c);
}

// CHECK-LABEL: test_f64x2_relaxed_madd:
// CHECK: f64x2.relaxed_madd{{$}}
v128_t test_f64x2_relaxed_madd(v128_t a, v128_t b, v128_t c) {
return wasm_f64x2_relaxed_madd(a, b, c);
}

// CHECK-LABEL: test_f64x2_relaxed_nmadd:
// CHECK: f64x2.relaxed_nmadd{{$}}
v128_t test_f64x2_relaxed_nmadd(v128_t a, v128_t b, v128_t c) {
return wasm_f64x2_relaxed_nmadd(a, b, c);
}

// CHECK-LABEL: test_i8x16_relaxed_laneselect:
// CHECK: i8x16.relaxed_laneselect{{$}}
v128_t test_i8x16_relaxed_laneselect(v128_t a, v128_t b, v128_t m) {
return wasm_i8x16_relaxed_laneselect(a, b, m);
}

// CHECK-LABEL: test_i16x8_relaxed_laneselect:
// CHECK: i16x8.relaxed_laneselect{{$}}
v128_t test_i16x8_relaxed_laneselect(v128_t a, v128_t b, v128_t m) {
return wasm_i16x8_relaxed_laneselect(a, b, m);
}

// CHECK-LABEL: test_i32x4_relaxed_laneselect:
// CHECK: i32x4.relaxed_laneselect{{$}}
v128_t test_i32x4_relaxed_laneselect(v128_t a, v128_t b, v128_t m) {
return wasm_i32x4_relaxed_laneselect(a, b, m);
}

// CHECK-LABEL: test_i64x2_relaxed_laneselect:
// CHECK: i64x2.relaxed_laneselect{{$}}
v128_t test_i64x2_relaxed_laneselect(v128_t a, v128_t b, v128_t m) {
return wasm_i64x2_relaxed_laneselect(a, b, m);
}

// CHECK-LABEL: test_i8x16_relaxed_swizzle:
// CHECK: i8x16.relaxed_swizzle{{$}}
v128_t test_i8x16_relaxed_swizzle(v128_t a, v128_t s) {
return wasm_i8x16_relaxed_swizzle(a, s);
}

// CHECK-LABEL: test_f32x4_relaxed_min:
// CHECK: f32x4.relaxed_min{{$}}
v128_t test_f32x4_relaxed_min(v128_t a, v128_t b) {
return wasm_f32x4_relaxed_min(a, b);
}

// CHECK-LABEL: test_f32x4_relaxed_max:
// CHECK: f32x4.relaxed_max{{$}}
v128_t test_f32x4_relaxed_max(v128_t a, v128_t b) {
return wasm_f32x4_relaxed_max(a, b);
}

// CHECK-LABEL: test_f64x2_relaxed_min:
// CHECK: f64x2.relaxed_min{{$}}
v128_t test_f64x2_relaxed_min(v128_t a, v128_t b) {
return wasm_f64x2_relaxed_min(a, b);
}

// CHECK-LABEL: test_f64x2_relaxed_max:
// CHECK: f64x2.relaxed_max
v128_t test_f64x2_relaxed_max(v128_t a, v128_t b) {
return wasm_f64x2_relaxed_max(a, b);
}

// CHECK-LABEL: test_i32x4_relaxed_trunc_f32x4:
// CHECK: i32x4.relaxed_trunc_f32x4_s{{$}}
v128_t test_i32x4_relaxed_trunc_f32x4(v128_t a) {
return wasm_i32x4_relaxed_trunc_f32x4(a);
}

// CHECK-LABEL: test_u32x4_relaxed_trunc_f32x4:
// CHECK: i32x4.relaxed_trunc_f32x4_u{{$}}
v128_t test_u32x4_relaxed_trunc_f32x4(v128_t a) {
return wasm_u32x4_relaxed_trunc_f32x4(a);
}

// CHECK-LABEL: test_i32x4_relaxed_trunc_f64x2_zero:
// CHECK: i32x4.relaxed_trunc_f64x2_s_zero{{$}}
v128_t test_i32x4_relaxed_trunc_f64x2_zero(v128_t a) {
return wasm_i32x4_relaxed_trunc_f64x2_zero(a);
}

// CHECK-LABEL: test_u32x4_relaxed_trunc_f64x2_zero:
// CHECK: i32x4.relaxed_trunc_f64x2_u_zero{{$}}
v128_t test_u32x4_relaxed_trunc_f64x2_zero(v128_t a) {
return wasm_u32x4_relaxed_trunc_f64x2_zero(a);
}

// CHECK-LABEL: test_i16x8_relaxed_q15mulr:
// CHECK: i16x8.relaxed_q15mulr_s{{$}}
v128_t test_i16x8_relaxed_q15mulr(v128_t a, v128_t b) {
return wasm_i16x8_relaxed_q15mulr(a, b);
}

// CHECK-LABEL: test_i16x8_relaxed_dot_i8x16_i7x16:
// CHECK: i16x8.relaxed_dot_i8x16_i7x16_s{{$}}
v128_t test_i16x8_relaxed_dot_i8x16_i7x16(v128_t a, v128_t b) {
return wasm_i16x8_relaxed_dot_i8x16_i7x16(a, b);
}

// CHECK-LABEL: test_i32x4_relaxed_dot_i8x16_i7x16_add:
// CHECK: i32x4.relaxed_dot_i8x16_i7x16_add_s{{$}}
v128_t test_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t a, v128_t b, v128_t c) {
return wasm_i32x4_relaxed_dot_i8x16_i7x16_add(a, b, c);
}

0 comments on commit c672c3f

Please sign in to comment.