-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[CIR] Implement builtin reduce fadd/fmul/fmax/fmin #171633
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clangir Author: Jasmine Tang (badumbatish) ChangesNew files are created to match the structure over at OGs Patch is 22.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171633.diff 5 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 62836ce0f7537..9ffbbc03edb11 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1492,26 +1492,125 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_vpshrdw128:
case X86::BI__builtin_ia32_vpshrdw256:
case X86::BI__builtin_ia32_vpshrdw512:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI__builtin_ia32_reduce_fadd_pd512:
case X86::BI__builtin_ia32_reduce_fadd_ps512:
case X86::BI__builtin_ia32_reduce_fadd_ph512:
case X86::BI__builtin_ia32_reduce_fadd_ph256:
- case X86::BI__builtin_ia32_reduce_fadd_ph128:
+ case X86::BI__builtin_ia32_reduce_fadd_ph128: {
+ StringRef intrinsicName = "";
+ switch (builtinID) {
+ case X86::BI__builtin_ia32_reduce_fadd_pd512:
+ intrinsicName = "vector.reduce.fadd.v8f64";
+ break;
+ case X86::BI__builtin_ia32_reduce_fadd_ps512:
+ intrinsicName = "vector.reduce.fadd.v16f32";
+ break;
+ case X86::BI__builtin_ia32_reduce_fadd_ph512:
+ intrinsicName = "vector.reduce.fadd.v32f16";
+ break;
+ case X86::BI__builtin_ia32_reduce_fadd_ph256:
+ intrinsicName = "vector.reduce.fadd.v16f16";
+ break;
+ case X86::BI__builtin_ia32_reduce_fadd_ph128:
+ intrinsicName = "vector.reduce.fadd.v8f16";
+ break;
+ }
+ assert(!cir::MissingFeatures::fastMathFlags());
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ intrinsicName, ops[0].getType(),
+ mlir::ValueRange{ops[0], ops[1]});
+ }
case X86::BI__builtin_ia32_reduce_fmul_pd512:
case X86::BI__builtin_ia32_reduce_fmul_ps512:
case X86::BI__builtin_ia32_reduce_fmul_ph512:
case X86::BI__builtin_ia32_reduce_fmul_ph256:
- case X86::BI__builtin_ia32_reduce_fmul_ph128:
+ case X86::BI__builtin_ia32_reduce_fmul_ph128: {
+ StringRef intrinsicName = "";
+ switch (builtinID) {
+ case X86::BI__builtin_ia32_reduce_fmul_pd512:
+ intrinsicName = "vector.reduce.fmul.v8f64";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmul_ps512:
+ intrinsicName = "vector.reduce.fmul.v16f32";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmul_ph512:
+ intrinsicName = "vector.reduce.fmul.v32f16";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmul_ph256:
+ intrinsicName = "vector.reduce.fmul.v16f16";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmul_ph128:
+ intrinsicName = "vector.reduce.fmul.v8f16";
+ break;
+ }
+ assert(!cir::MissingFeatures::fastMathFlags());
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ intrinsicName, ops[0].getType(),
+ mlir::ValueRange{ops[0], ops[1]});
+ }
case X86::BI__builtin_ia32_reduce_fmax_pd512:
case X86::BI__builtin_ia32_reduce_fmax_ps512:
case X86::BI__builtin_ia32_reduce_fmax_ph512:
case X86::BI__builtin_ia32_reduce_fmax_ph256:
- case X86::BI__builtin_ia32_reduce_fmax_ph128:
+ case X86::BI__builtin_ia32_reduce_fmax_ph128: {
+ StringRef intrinsicName = "";
+ switch (builtinID) {
+ case X86::BI__builtin_ia32_reduce_fmax_pd512:
+ intrinsicName = "vector.reduce.fmax.v8f64";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmax_ps512:
+ intrinsicName = "vector.reduce.fmax.v16f32";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmax_ph512:
+ intrinsicName = "vector.reduce.fmax.v32f16";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmax_ph256:
+ intrinsicName = "vector.reduce.fmax.v16f16";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmax_ph128:
+ intrinsicName = "vector.reduce.fmax.v8f16";
+ break;
+ }
+ assert(!cir::MissingFeatures::fastMathFlags());
+ cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ intrinsicName, vecTy.getElementType(),
+ mlir::ValueRange{ops[0]});
+ }
case X86::BI__builtin_ia32_reduce_fmin_pd512:
case X86::BI__builtin_ia32_reduce_fmin_ps512:
case X86::BI__builtin_ia32_reduce_fmin_ph512:
case X86::BI__builtin_ia32_reduce_fmin_ph256:
- case X86::BI__builtin_ia32_reduce_fmin_ph128:
+ case X86::BI__builtin_ia32_reduce_fmin_ph128: {
+ StringRef intrinsicName = "";
+ switch (builtinID) {
+ case X86::BI__builtin_ia32_reduce_fmin_pd512:
+ intrinsicName = "vector.reduce.fmin.v8f64";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmin_ps512:
+ intrinsicName = "vector.reduce.fmin.v16f32";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmin_ph512:
+ intrinsicName = "vector.reduce.fmin.v32f16";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmin_ph256:
+ intrinsicName = "vector.reduce.fmin.v16f16";
+ break;
+ case X86::BI__builtin_ia32_reduce_fmin_ph128:
+ intrinsicName = "vector.reduce.fmin.v8f16";
+ break;
+ }
+
+ assert(!cir::MissingFeatures::fastMathFlags());
+ cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
+ return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
+ intrinsicName, vecTy.getElementType(),
+ mlir::ValueRange{ops[0]});
+ }
case X86::BI__builtin_ia32_rdrand16_step:
case X86::BI__builtin_ia32_rdrand32_step:
case X86::BI__builtin_ia32_rdrand64_step:
diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
new file mode 100644
index 0000000000000..9b956b80adf8c
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR
+// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM
+// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){
+
+ // CIR-LABEL: _mm512_reduce_add_pd
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f64" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
+
+ // CIR-LABEL: test_mm512_reduce_add_pd
+ // CIR: cir.call @_mm512_reduce_add_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double
+
+ // LLVM-LABEL: test_mm512_reduce_add_pd
+ // LLVM: call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_add_pd
+ // OGCG-NOT: reassoc
+ // OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
+ // OGCG-NOT: reassoc
+ return _mm512_reduce_add_pd(__W) + ExtraAddOp;
+}
+
+double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){
+ // CIR-LABEL: _mm512_reduce_mul_pd
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v8f64" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double
+
+ // CIR-LABEL: test_mm512_reduce_mul_pd
+ // CIR: cir.call @_mm512_reduce_mul_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double
+
+ // LLVM-LABEL: test_mm512_reduce_mul_pd
+ // LLVM: call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_mul_pd
+ // OGCG-NOT: reassoc
+ // OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
+ // OGCG-NOT: reassoc
+ return _mm512_reduce_mul_pd(__W) * ExtraMulOp;
+}
+
+
+float test_mm512_reduce_add_ps(__m512 __W){
+ // CIR-LABEL: _mm512_reduce_add_ps
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f32" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
+
+ // CIR-LABEL: test_mm512_reduce_add_ps
+ // CIR: cir.call @_mm512_reduce_add_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float
+
+ // LLVM-LABEL: test_mm512_reduce_add_ps
+ // LLVM: call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_add_ps
+ // OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
+ return _mm512_reduce_add_ps(__W);
+}
+
+float test_mm512_reduce_mul_ps(__m512 __W){
+ // CIR-LABEL: _mm512_reduce_mul_ps
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f32" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float
+
+ // CIR-LABEL: test_mm512_reduce_mul_ps
+ // CIR: cir.call @_mm512_reduce_mul_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float
+
+ // LLVM-LABEL: test_mm512_reduce_mul_ps
+ // LLVM: call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_mul_ps
+ // OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
+ return _mm512_reduce_mul_ps(__W);
+}
diff --git a/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
new file mode 100644
index 0000000000000..be2865f9a4934
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM
+// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
+ // CIR-LABEL: _mm512_reduce_max_pd
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v8f64" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double
+
+ // CIR-LABEL: test_mm512_reduce_max_pd
+ // CIR: cir.call @_mm512_reduce_max_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double
+
+ // LLVM-LABEL: test_mm512_reduce_max_pd
+ // LLVM: call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_max_pd
+ // OGCG-NOT: nnan
+ // OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
+ // OGCG-NOT: nnan
+ return _mm512_reduce_max_pd(__W) + ExtraAddOp;
+}
+
+double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
+ // CIR-LABEL: _mm512_reduce_min_pd
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v8f64" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double
+
+ // CIR-LABEL: test_mm512_reduce_min_pd
+ // CIR: cir.call @_mm512_reduce_min_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double
+
+ // LLVM-LABEL: test_mm512_reduce_min_pd
+ // LLVM: call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_min_pd
+ // OGCG-NOT: nnan
+ // OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
+ // OGCG-NOT: nnan
+ return _mm512_reduce_min_pd(__W) * ExtraMulOp;
+}
+
+float test_mm512_reduce_max_ps(__m512 __W){
+ // CIR-LABEL: _mm512_reduce_max_ps
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f32" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float
+
+ // CIR-LABEL: test_mm512_reduce_max_ps
+ // CIR: cir.call @_mm512_reduce_max_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float
+
+ // LLVM-LABEL: test_mm512_reduce_max_ps
+ // LLVM: call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_max_ps
+ // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
+ return _mm512_reduce_max_ps(__W);
+}
+
+float test_mm512_reduce_min_ps(__m512 __W){
+ // CIR-LABEL: _mm512_reduce_min_ps
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f32" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float
+
+ // CIR-LABEL: test_mm512_reduce_min_ps
+ // CIR: cir.call @_mm512_reduce_min_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float
+
+ // LLVM-LABEL: test_mm512_reduce_min_ps
+ // LLVM: call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_min_ps
+ // OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
+ return _mm512_reduce_min_ps(__W);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
index 161fc45b2a32d..94a3834f09a01 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
@@ -63,4 +63,64 @@ __m512h test_mm512_undefined_ph(void) {
// OGCG-LABEL: test_mm512_undefined_ph
// OGCG: ret <32 x half> zeroinitializer
return _mm512_undefined_ph();
-}
\ No newline at end of file
+}
+
+_Float16 test_mm512_reduce_add_ph(__m512h __W) {
+ // CIR-LABEL: _mm512_reduce_add_ph
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v32f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
+
+ // CIR-LABEL: test_mm512_reduce_add_ph
+ // CIR: cir.call @_mm512_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16
+
+ // LLVM-LABEL: test_mm512_reduce_add_ph
+ // LLVM: call half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_add_ph
+ // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}})
+ return _mm512_reduce_add_ph(__W);
+}
+
+_Float16 test_mm512_reduce_mul_ph(__m512h __W) {
+ // CIR-LABEL: _mm512_reduce_mul_ph
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v32f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16
+
+ // CIR-LABEL: test_mm512_reduce_mul_ph
+ // CIR: cir.call @_mm512_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16
+
+ // LLVM-LABEL: test_mm512_reduce_mul_ph
+ // LLVM: call half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_mul_ph
+ // OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}})
+ return _mm512_reduce_mul_ph(__W);
+}
+
+_Float16 test_mm512_reduce_max_ph(__m512h __W) {
+ // CIR-LABEL: _mm512_reduce_max_ph
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v32f16" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16
+
+ // CIR-LABEL: test_mm512_reduce_max_ph
+ // CIR: cir.call @_mm512_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16
+
+ // LLVM-LABEL: test_mm512_reduce_max_ph
+ // LLVM: call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_max_ph
+ // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}})
+ return _mm512_reduce_max_ph(__W);
+}
+
+_Float16 test_mm512_reduce_min_ph(__m512h __W) {
+ // CIR-LABEL: _mm512_reduce_min_ph
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v32f16" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16
+
+ // CIR-LABEL: test_mm512_reduce_min_ph
+ // CIR: cir.call @_mm512_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16
+
+ // LLVM-LABEL: test_mm512_reduce_min_ph
+ // LLVM: call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}})
+
+ // OGCG-LABEL: test_mm512_reduce_min_ph
+ // OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}})
+ return _mm512_reduce_min_ph(__W);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
new file mode 100644
index 0000000000000..f1dfe56861eec
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+#include <immintrin.h>
+
+_Float16 test_mm256_reduce_add_ph(__m256h __W) {
+ // CIR-LABEL: _mm256_reduce_add_ph
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v16f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
+
+ // CIR-LABEL: test_mm256_reduce_add_ph
+ // CIR: cir.call @_mm256_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16
+
+ // LLVM-LABEL: test_mm256_reduce_add_ph
+ // LLVM: call half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}})
+
+ // OGCG-LABEL: test_mm256_reduce_add_ph
+ // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}})
+ return _mm256_reduce_add_ph(__W);
+}
+
+_Float16 test_mm256_reduce_mul_ph(__m256h __W) {
+ // CIR-LABEL: _mm256_reduce_mul_ph
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmul.v16f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16
+
+ // CIR-LABEL: test_mm256_reduce_mul_ph
+ // CIR: cir.call @_mm256_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16
+
+ // LLVM-LABEL: test_mm256_reduce_mul_ph
+ // LLVM: call half @llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}})
+
+ // OGCG-LABEL: test_mm256_reduce_mul_ph
+ // OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}})
+ return _mm256_reduce_mul_ph(__W);
+}
+
+_Float16 test_mm256_reduce_max_ph(__m256h __W) {
+ // CIR-LABEL: _mm256_reduce_max_ph
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmax.v16f16" %[[V:.*]] (!cir.vector<16 x !cir.f16>) -> !cir.f16
+
+ // CIR-LABEL: test_mm256_reduce_max_ph
+ // CIR: cir.call @_mm256_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16
+
+ // LLVM-LABEL: test_mm256_reduce_max_ph
+ // LLVM: call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})
+
+ // OGCG-LABEL: test_mm256_reduce_max_ph
+ // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})
+ return _mm256_reduce_max_ph(__W);
+}
+
+_Float16 test_mm256_reduce_min_ph(__m256h __W) {
+ // CIR-LABEL: _mm256_reduce_min_ph
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fmin.v16f16" %[[V:.*]] : (!cir.vector<16 x !cir.f16>) -> !cir.f16
+
+ // CIR-LABEL: test_mm256_reduce_min_ph
+ // CIR: cir.call @_mm256_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16
+
+ // LLVM-LABEL: test_mm256_reduce_min_ph
+ // LLVM: call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})
+
+ // OGCG-LABEL: test_mm256_reduce_min_ph
+ // OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})
+ return _mm256_reduce_min_ph(__W);
+}
+
+_Float16 test_mm_reduce_add_ph(__m128h __W) {
+ // CIR-LABEL: _mm_reduce_add_ph
+ // CIR: cir.call_llvm_intrinsic "vector.reduce.fadd.v8f16" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16
+
+ // CIR-LABEL: test_mm_redu...
[truncated]
|
🐧 Linux x64 Test Results
All executed tests passed, but another part of the build failed. Click on a failure below to see the details. tools/clang/lib/CIR/CodeGen/CMakeFiles/obj.clangCIR.dir/CIRGenStmt.cpp.oIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
andykaylor
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm, I have just a small nit. feel free to commit without further review once CI is passing.
New files are created to match the structure over at OGs
New files are created to match the structure over at OGs