diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index e7aa8a234efd9..4cbc8dcffdce1 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -89,6 +89,14 @@ static mlir::Value getMaskVecValue(CIRGenFunction &cgf, const CallExpr *expr, } return maskVec; } +static mlir::Value emitX86CompressExpand(CIRGenFunction &cgf, const CallExpr *expr,ArrayRef ops, bool IsCompress, const std::string &ID){ + auto ResultTy = cast(ops[1].getType()); + mlir::Value MaskValue = getMaskVecValue(cgf, expr, ops[2], cast(ResultTy).getSize()); + llvm::SmallVector op{ops[0], ops[1], MaskValue}; + + return emitIntrinsicCallOp(cgf,expr, ID, ResultTy, op); + +} mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { @@ -454,7 +462,9 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, case X86::BI__builtin_ia32_compresshi512_mask: case X86::BI__builtin_ia32_compressqi128_mask: case X86::BI__builtin_ia32_compressqi256_mask: - case X86::BI__builtin_ia32_compressqi512_mask: + case X86::BI__builtin_ia32_compressqi512_mask:{ + return emitX86CompressExpand(*this, expr, ops, true, "x86_avx512_mask_compress"); + } case X86::BI__builtin_ia32_gather3div2df: case X86::BI__builtin_ia32_gather3div2di: case X86::BI__builtin_ia32_gather3div4df: diff --git a/clang/test/CIR/CodeGen/X86/builtin_mask_compress.c b/clang/test/CIR/CodeGen/X86/builtin_mask_compress.c new file mode 100644 index 0000000000000..30c0553dfc8f8 --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/builtin_mask_compress.c @@ -0,0 +1,7 @@ +#include +///home/cs25resch11005/myFiles/off_contrib/llvm-project/clang/lib/Headers/avx512vlvbmi2intrin.h + +__m128i test_mm_mask_compress(__m128i __S, __mmask8 __U, __m128i __D){ + + return (__m128i)_mm_mask_compress_epi16(__S, __U, __D); +} \ No newline at end of file diff --git a/clang/test/CIR/CodeGen/X86/builtin_mask_compress.cir b/clang/test/CIR/CodeGen/X86/builtin_mask_compress.cir new file mode 100644 index 0000000000000..fd0d35305f115 --- /dev/null +++ b/clang/test/CIR/CodeGen/X86/builtin_mask_compress.cir @@ -0,0 +1,83 @@ +!s16i = !cir.int +!s64i = !cir.int +!u8i = !cir.int +#loc3 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:25) +#loc4 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:33) +#loc5 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:38) +#loc6 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:47) +#loc7 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:52) +#loc8 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":36:60) +#loc18 = loc("builtin_mask_compress.c":4:31) +#loc19 = loc("builtin_mask_compress.c":4:39) +#loc20 = loc("builtin_mask_compress.c":4:44) +#loc21 = loc("builtin_mask_compress.c":4:53) +#loc22 = loc("builtin_mask_compress.c":4:58) +#loc23 = loc("builtin_mask_compress.c":4:66) +#loc32 = loc(fused[#loc3, #loc4]) +#loc33 = loc(fused[#loc5, #loc6]) +#loc34 = loc(fused[#loc7, #loc8]) +#loc38 = loc(fused[#loc18, #loc19]) +#loc39 = loc(fused[#loc20, #loc21]) +#loc40 = loc(fused[#loc22, #loc23]) +module @"/home/cs25resch11005/myFiles/off_contrib/llvm-project/clang/test/CIR/CodeGen/X86/builtin_mask_compress.c" attributes {cir.lang = #cir.lang, cir.module_asm = [], cir.triple = "x86_64-unknown-linux-gnu", dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>} { + cir.func internal private dso_local @_mm_mask_compress_epi16(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc3, #loc4]), %arg1: !u8i loc(fused[#loc5, #loc6]), %arg2: !cir.vector<2 x !s64i> loc(fused[#loc7, #loc8])) -> !cir.vector<2 x !s64i> inline(always) { + %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__S", init] {alignment = 16 : i64} loc(#loc32) + %1 = cir.alloca !u8i, !cir.ptr, ["__U", init] {alignment = 1 : i64} loc(#loc33) + %2 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__D", init] {alignment = 16 : i64} loc(#loc34) + %3 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc2) + cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc9) + cir.store %arg1, %1 : !u8i, !cir.ptr loc(#loc9) + cir.store %arg2, %2 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc9) + %4 = cir.load align(16) %2 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc10) + %5 = cir.cast bitcast %4 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc10) + %6 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc11) + %7 = cir.cast bitcast %6 : !cir.vector<2 x !s64i> -> !cir.vector<8 x !s16i> loc(#loc11) + %8 = cir.load align(1) %1 : !cir.ptr, !u8i loc(#loc12) + %9 = cir.cast bitcast %8 : !u8i -> !cir.vector<8 x !cir.int> loc(#loc12) + %10 = cir.call_llvm_intrinsic "x86_avx512_mask_compress" %5, %7, %9 : (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int>) -> !cir.vector<8 x !s16i> loc(#loc13) + %11 = cir.cast bitcast %10 : !cir.vector<8 x !s16i> -> !cir.vector<2 x !s64i> loc(#loc35) + cir.store %11, %3 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc36) + %12 = cir.load %3 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc36) + cir.return %12 : !cir.vector<2 x !s64i> loc(#loc36) + } loc(#loc31) + cir.func dso_local @test_mm_mask_compress(%arg0: !cir.vector<2 x !s64i> loc(fused[#loc18, #loc19]), %arg1: !u8i loc(fused[#loc20, #loc21]), %arg2: !cir.vector<2 x !s64i> loc(fused[#loc22, #loc23])) -> !cir.vector<2 x !s64i> inline(never) { + %0 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__S", init] {alignment = 16 : i64} loc(#loc38) + %1 = cir.alloca !u8i, !cir.ptr, ["__U", init] {alignment = 1 : i64} loc(#loc39) + %2 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__D", init] {alignment = 16 : i64} loc(#loc40) + %3 = cir.alloca !cir.vector<2 x !s64i>, !cir.ptr>, ["__retval"] {alignment = 16 : i64} loc(#loc17) + cir.store %arg0, %0 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc24) + cir.store %arg1, %1 : !u8i, !cir.ptr loc(#loc24) + cir.store %arg2, %2 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc24) + %4 = cir.load align(16) %0 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc25) + %5 = cir.load align(1) %1 : !cir.ptr, !u8i loc(#loc26) + %6 = cir.load align(16) %2 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc27) + %7 = cir.call @_mm_mask_compress_epi16(%4, %5, %6) : (!cir.vector<2 x !s64i>, !u8i, !cir.vector<2 x !s64i>) -> !cir.vector<2 x !s64i> loc(#loc28) + cir.store %7, %3 : !cir.vector<2 x !s64i>, !cir.ptr> loc(#loc41) + %8 = cir.load %3 : !cir.ptr>, !cir.vector<2 x !s64i> loc(#loc41) + cir.return %8 : !cir.vector<2 x !s64i> loc(#loc41) + } loc(#loc37) +} loc(#loc) +#loc = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/clang/test/CIR/CodeGen/X86/builtin_mask_compress.c":0:0) +#loc1 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":35:1) +#loc2 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":41:1) +#loc9 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":37:1) +#loc10 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":38:64) +#loc11 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":39:24) +#loc12 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":40:15) +#loc13 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":38:20) +#loc14 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":40:18) +#loc15 = loc("/home/cs25resch11005/myFiles/off_contrib/llvm-project/build/lib/clang/22/include/avx512vlvbmi2intrin.h":38:3) +#loc16 = loc("builtin_mask_compress.c":4:1) +#loc17 = loc("builtin_mask_compress.c":7:1) +#loc24 = loc("builtin_mask_compress.c":4:70) +#loc25 = loc("builtin_mask_compress.c":6:45) +#loc26 = loc("builtin_mask_compress.c":6:50) +#loc27 = loc("builtin_mask_compress.c":6:55) +#loc28 = loc("builtin_mask_compress.c":6:21) +#loc29 = loc("builtin_mask_compress.c":6:5) +#loc30 = loc("builtin_mask_compress.c":6:58) +#loc31 = loc(fused[#loc1, #loc2]) +#loc35 = loc(fused[#loc13, #loc14]) +#loc36 = loc(fused[#loc15, #loc14]) +#loc37 = loc(fused[#loc16, #loc17]) +#loc41 = loc(fused[#loc29, #loc30])