Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 30 additions & 4 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1552,26 +1552,52 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_vpshrdw128:
case X86::BI__builtin_ia32_vpshrdw256:
case X86::BI__builtin_ia32_vpshrdw512:
cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented X86 builtin call: ") +
getContext().BuiltinInfo.getName(builtinID));
return {};
case X86::BI__builtin_ia32_reduce_fadd_pd512:
case X86::BI__builtin_ia32_reduce_fadd_ps512:
case X86::BI__builtin_ia32_reduce_fadd_ph512:
case X86::BI__builtin_ia32_reduce_fadd_ph256:
case X86::BI__builtin_ia32_reduce_fadd_ph128:
case X86::BI__builtin_ia32_reduce_fadd_ph128: {
assert(!cir::MissingFeatures::fastMathFlags());
return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
"vector.reduce.fadd", ops[0].getType(),
mlir::ValueRange{ops[0], ops[1]});
}
case X86::BI__builtin_ia32_reduce_fmul_pd512:
case X86::BI__builtin_ia32_reduce_fmul_ps512:
case X86::BI__builtin_ia32_reduce_fmul_ph512:
case X86::BI__builtin_ia32_reduce_fmul_ph256:
case X86::BI__builtin_ia32_reduce_fmul_ph128:
case X86::BI__builtin_ia32_reduce_fmul_ph128: {
assert(!cir::MissingFeatures::fastMathFlags());
return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
"vector.reduce.fmul", ops[0].getType(),
mlir::ValueRange{ops[0], ops[1]});
}
case X86::BI__builtin_ia32_reduce_fmax_pd512:
case X86::BI__builtin_ia32_reduce_fmax_ps512:
case X86::BI__builtin_ia32_reduce_fmax_ph512:
case X86::BI__builtin_ia32_reduce_fmax_ph256:
case X86::BI__builtin_ia32_reduce_fmax_ph128:
case X86::BI__builtin_ia32_reduce_fmax_ph128: {
assert(!cir::MissingFeatures::fastMathFlags());
cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
"vector.reduce.fmax", vecTy.getElementType(),
mlir::ValueRange{ops[0]});
}
case X86::BI__builtin_ia32_reduce_fmin_pd512:
case X86::BI__builtin_ia32_reduce_fmin_ps512:
case X86::BI__builtin_ia32_reduce_fmin_ph512:
case X86::BI__builtin_ia32_reduce_fmin_ph256:
case X86::BI__builtin_ia32_reduce_fmin_ph128:
case X86::BI__builtin_ia32_reduce_fmin_ph128: {
assert(!cir::MissingFeatures::fastMathFlags());
cir::VectorType vecTy = cast<cir::VectorType>(ops[0].getType());
return emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()),
"vector.reduce.fmin", vecTy.getElementType(),
mlir::ValueRange{ops[0]});
}
case X86::BI__builtin_ia32_rdrand16_step:
case X86::BI__builtin_ia32_rdrand32_step:
case X86::BI__builtin_ia32_rdrand64_step:
Expand Down
71 changes: 71 additions & 0 deletions clang/test/CIR/CodeGen/X86/avx512-reduceIntrin.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR
// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM
// RUN: %clang_cc1 -x c -ffreestanding %s -O2 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG

#include <immintrin.h>

double test_mm512_reduce_add_pd(__m512d __W, double ExtraAddOp){

// CIR-LABEL: _mm512_reduce_add_pd
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double

// CIR-LABEL: test_mm512_reduce_add_pd
// CIR: cir.call @_mm512_reduce_add_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double

// LLVM-LABEL: test_mm512_reduce_add_pd
// LLVM: call double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_add_pd
// OGCG-NOT: reassoc
// OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fadd.v8f64(double -0.000000e+00, <8 x double> %{{.*}})
// OGCG-NOT: reassoc
return _mm512_reduce_add_pd(__W) + ExtraAddOp;
}

double test_mm512_reduce_mul_pd(__m512d __W, double ExtraMulOp){
// CIR-LABEL: _mm512_reduce_mul_pd
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.double, !cir.vector<8 x !cir.double>) -> !cir.double

// CIR-LABEL: test_mm512_reduce_mul_pd
// CIR: cir.call @_mm512_reduce_mul_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double

// LLVM-LABEL: test_mm512_reduce_mul_pd
// LLVM: call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_mul_pd
// OGCG-NOT: reassoc
// OGCG: call reassoc {{.*}}double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %{{.*}})
// OGCG-NOT: reassoc
return _mm512_reduce_mul_pd(__W) * ExtraMulOp;
}


float test_mm512_reduce_add_ps(__m512 __W){
// CIR-LABEL: _mm512_reduce_add_ps
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float

// CIR-LABEL: test_mm512_reduce_add_ps
// CIR: cir.call @_mm512_reduce_add_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float

// LLVM-LABEL: test_mm512_reduce_add_ps
// LLVM: call float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_add_ps
// OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fadd.v16f32(float -0.000000e+00, <16 x float> %{{.*}})
return _mm512_reduce_add_ps(__W);
}

float test_mm512_reduce_mul_ps(__m512 __W){
// CIR-LABEL: _mm512_reduce_mul_ps
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.float, !cir.vector<16 x !cir.float>) -> !cir.float

// CIR-LABEL: test_mm512_reduce_mul_ps
// CIR: cir.call @_mm512_reduce_mul_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float

// LLVM-LABEL: test_mm512_reduce_mul_ps
// LLVM: call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_mul_ps
// OGCG: call reassoc {{.*}}float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %{{.*}})
return _mm512_reduce_mul_ps(__W);
}
69 changes: 69 additions & 0 deletions clang/test/CIR/CodeGen/X86/avx512-reduceMinMaxIntrin.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-cir -o - -Wall -Werror | FileCheck %s --check-prefixes=CIR
// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -fclangir -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=LLVM
// RUN: %clang_cc1 -x c -ffreestanding %s -O0 -triple=x86_64-apple-darwin -target-cpu skylake-avx512 -emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG

#include <immintrin.h>

double test_mm512_reduce_max_pd(__m512d __W, double ExtraAddOp){
// CIR-LABEL: _mm512_reduce_max_pd
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double

// CIR-LABEL: test_mm512_reduce_max_pd
// CIR: cir.call @_mm512_reduce_max_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double

// LLVM-LABEL: test_mm512_reduce_max_pd
// LLVM: call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_max_pd
// OGCG-NOT: nnan
// OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmax.v8f64(<8 x double> %{{.*}})
// OGCG-NOT: nnan
return _mm512_reduce_max_pd(__W) + ExtraAddOp;
}

double test_mm512_reduce_min_pd(__m512d __W, double ExtraMulOp){
// CIR-LABEL: _mm512_reduce_min_pd
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<8 x !cir.double>) -> !cir.double

// CIR-LABEL: test_mm512_reduce_min_pd
// CIR: cir.call @_mm512_reduce_min_pd(%[[VEC:.*]]) : (!cir.vector<8 x !cir.double>) -> !cir.double

// LLVM-LABEL: test_mm512_reduce_min_pd
// LLVM: call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_min_pd
// OGCG-NOT: nnan
// OGCG: call nnan {{.*}}double @llvm.vector.reduce.fmin.v8f64(<8 x double> %{{.*}})
// OGCG-NOT: nnan
return _mm512_reduce_min_pd(__W) * ExtraMulOp;
}

float test_mm512_reduce_max_ps(__m512 __W){
// CIR-LABEL: _mm512_reduce_max_ps
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float

// CIR-LABEL: test_mm512_reduce_max_ps
// CIR: cir.call @_mm512_reduce_max_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float

// LLVM-LABEL: test_mm512_reduce_max_ps
// LLVM: call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_max_ps
// OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmax.v16f32(<16 x float> %{{.*}})
return _mm512_reduce_max_ps(__W);
}

float test_mm512_reduce_min_ps(__m512 __W){
// CIR-LABEL: _mm512_reduce_min_ps
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<16 x !cir.float>) -> !cir.float

// CIR-LABEL: test_mm512_reduce_min_ps
// CIR: cir.call @_mm512_reduce_min_ps(%[[VEC:.*]]) : (!cir.vector<16 x !cir.float>) -> !cir.float

// LLVM-LABEL: test_mm512_reduce_min_ps
// LLVM: call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_min_ps
// OGCG: call nnan {{.*}}float @llvm.vector.reduce.fmin.v16f32(<16 x float> %{{.*}})
return _mm512_reduce_min_ps(__W);
}
62 changes: 61 additions & 1 deletion clang/test/CIR/CodeGenBuiltins/X86/avx512fp16-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,64 @@ __m512h test_mm512_undefined_ph(void) {
// OGCG-LABEL: test_mm512_undefined_ph
// OGCG: ret <32 x half> zeroinitializer
return _mm512_undefined_ph();
}
}

_Float16 test_mm512_reduce_add_ph(__m512h __W) {
// CIR-LABEL: _mm512_reduce_add_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm512_reduce_add_ph
// CIR: cir.call @_mm512_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm512_reduce_add_ph
// LLVM: call half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_add_ph
// OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fadd.v32f16(half 0xH8000, <32 x half> %{{.*}})
return _mm512_reduce_add_ph(__W);
}

_Float16 test_mm512_reduce_mul_ph(__m512h __W) {
// CIR-LABEL: _mm512_reduce_mul_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<32 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm512_reduce_mul_ph
// CIR: cir.call @_mm512_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm512_reduce_mul_ph
// LLVM: call half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_mul_ph
// OGCG: call reassoc {{.*}}half @llvm.vector.reduce.fmul.v32f16(half 0xH3C00, <32 x half> %{{.*}})
return _mm512_reduce_mul_ph(__W);
}

_Float16 test_mm512_reduce_max_ph(__m512h __W) {
// CIR-LABEL: _mm512_reduce_max_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm512_reduce_max_ph
// CIR: cir.call @_mm512_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm512_reduce_max_ph
// LLVM: call half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_max_ph
// OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmax.v32f16(<32 x half> %{{.*}})
return _mm512_reduce_max_ph(__W);
}

_Float16 test_mm512_reduce_min_ph(__m512h __W) {
// CIR-LABEL: _mm512_reduce_min_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] (!cir.vector<32 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm512_reduce_min_ph
// CIR: cir.call @_mm512_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<32 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm512_reduce_min_ph
// LLVM: call half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}})

// OGCG-LABEL: test_mm512_reduce_min_ph
// OGCG: call nnan {{.*}}half @llvm.vector.reduce.fmin.v32f16(<32 x half> %{{.*}})
return _mm512_reduce_min_ph(__W);
}
129 changes: 129 additions & 0 deletions clang/test/CIR/CodeGenBuiltins/X86/avx512vlfp16-builtins.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-cir -o %t.cir -Wall -Werror
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -fclangir -emit-llvm -o %t.ll -Wall -Werror
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s

// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512fp16 -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
#include <immintrin.h>

_Float16 test_mm256_reduce_add_ph(__m256h __W) {
// CIR-LABEL: _mm256_reduce_add_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm256_reduce_add_ph
// CIR: cir.call @_mm256_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm256_reduce_add_ph
// LLVM: call half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}})

// OGCG-LABEL: test_mm256_reduce_add_ph
// OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> %{{.*}})
return _mm256_reduce_add_ph(__W);
}

_Float16 test_mm256_reduce_mul_ph(__m256h __W) {
// CIR-LABEL: _mm256_reduce_mul_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<16 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm256_reduce_mul_ph
// CIR: cir.call @_mm256_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm256_reduce_mul_ph
// LLVM: call half @llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}})

// OGCG-LABEL: test_mm256_reduce_mul_ph
// OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v16f16(half 0xH3C00, <16 x half> %{{.*}})
return _mm256_reduce_mul_ph(__W);
}

_Float16 test_mm256_reduce_max_ph(__m256h __W) {
// CIR-LABEL: _mm256_reduce_max_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<16 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm256_reduce_max_ph
// CIR: cir.call @_mm256_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm256_reduce_max_ph
// LLVM: call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})

// OGCG-LABEL: test_mm256_reduce_max_ph
// OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v16f16(<16 x half> %{{.*}})
return _mm256_reduce_max_ph(__W);
}

_Float16 test_mm256_reduce_min_ph(__m256h __W) {
// CIR-LABEL: _mm256_reduce_min_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<16 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm256_reduce_min_ph
// CIR: cir.call @_mm256_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<16 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm256_reduce_min_ph
// LLVM: call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})

// OGCG-LABEL: test_mm256_reduce_min_ph
// OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v16f16(<16 x half> %{{.*}})
return _mm256_reduce_min_ph(__W);
}

_Float16 test_mm_reduce_add_ph(__m128h __W) {
// CIR-LABEL: _mm_reduce_add_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fadd" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm_reduce_add_ph
// CIR: cir.call @_mm_reduce_add_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm_reduce_add_ph
// LLVM: call half @llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> %{{.*}})

// OGCG-LABEL: test_mm_reduce_add_ph
// OGCG: call reassoc {{.*}}@llvm.vector.reduce.fadd.v8f16(half 0xH8000, <8 x half> %{{.*}})
return _mm_reduce_add_ph(__W);
}

_Float16 test_mm_reduce_mul_ph(__m128h __W) {
// CIR-LABEL: _mm_reduce_mul_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmul" %[[R:.*]], %[[V:.*]] : (!cir.f16, !cir.vector<8 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm_reduce_mul_ph
// CIR: cir.call @_mm_reduce_mul_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm_reduce_mul_ph
// LLVM: call half @llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> %{{.*}})

// OGCG-LABEL: test_mm_reduce_mul_ph
// OGCG: call reassoc {{.*}}@llvm.vector.reduce.fmul.v8f16(half 0xH3C00, <8 x half> %{{.*}})
return _mm_reduce_mul_ph(__W);
}

_Float16 test_mm_reduce_max_ph(__m128h __W) {
// CIR-LABEL: _mm_reduce_max_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmax" %[[V:.*]] (!cir.vector<8 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm_reduce_max_ph
// CIR: cir.call @_mm_reduce_max_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm_reduce_max_ph
// LLVM: call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})

// OGCG-LABEL: test_mm_reduce_max_ph
// OGCG: call nnan {{.*}}@llvm.vector.reduce.fmax.v8f16(<8 x half> %{{.*}})
return _mm_reduce_max_ph(__W);
}

_Float16 test_mm_reduce_min_ph(__m128h __W) {
// CIR-LABEL: _mm_reduce_min_ph
// CIR: cir.call_llvm_intrinsic "vector.reduce.fmin" %[[V:.*]] : (!cir.vector<8 x !cir.f16>) -> !cir.f16

// CIR-LABEL: test_mm_reduce_min_ph
// CIR: cir.call @_mm_reduce_min_ph(%[[VEC:.*]]) : (!cir.vector<8 x !cir.f16>) -> !cir.f16

// LLVM-LABEL: test_mm_reduce_min_ph
// LLVM: call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}})

// OGCG-LABEL: test_mm_reduce_min_ph
// OGCG: call nnan {{.*}}@llvm.vector.reduce.fmin.v8f16(<8 x half> %{{.*}})
return _mm_reduce_min_ph(__W);
}

Loading